From 9d1d31ad9498e6ceb285d5774e34fed5f648c273 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 6 Mar 2018 16:48:44 +0100 Subject: [PATCH] x86: slightly reduce Meltdown band-aid overhead I'm not sure why I didn't do this right away: By avoiding the use of global PTEs in the cloned directmap, there's no need to fiddle with CR4.PGE on any of the entry paths. Only the exit paths need to flush global mappings. The reduced flushing, however, requires that we now have interrupts off on all entry paths until after the page table switch, so that flush IPIs can't be serviced while on the restricted pagetables, leaving a window where a potentially stale guest global mapping can be brought into the TLB. Along those lines the "sync" IPI after L4 entry updates now needs to become a real (and global) flush IPI, so that inside Xen we'll also pick up such changes. Signed-off-by: Jan Beulich Tested-by: Juergen Gross Reviewed-by: Juergen Gross Reviewed-by: Andrew Cooper --- xen/arch/x86/mm.c | 8 ++------ xen/arch/x86/smpboot.c | 10 ++++++++++ xen/arch/x86/x86_64/compat/entry.S | 5 +++-- xen/arch/x86/x86_64/entry.S | 21 ++++++++++++--------- 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 8021f93a63..9b559448a7 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3823,18 +3823,14 @@ long do_mmu_update( { /* * Force other vCPU-s of the affected guest to pick up L4 entry - * changes (if any). Issue a flush IPI with empty operation mask to - * facilitate this (including ourselves waiting for the IPI to - * actually have arrived). Utilize the fact that FLUSH_VA_VALID is - * meaningless without FLUSH_CACHE, but will allow to pass the no-op - * check in flush_area_mask(). + * changes (if any). */ unsigned int cpu = smp_processor_id(); cpumask_t *mask = per_cpu(scratch_cpumask, cpu); cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu)); if ( !cpumask_empty(mask) ) - flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID); + flush_mask(mask, FLUSH_TLB_GLOBAL); } perfc_add(num_page_updates, i); diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index d376c69c42..98873df429 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -739,6 +739,7 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt) } pl1e += l1_table_offset(linear); + flags &= ~_PAGE_GLOBAL; if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT ) { @@ -1053,8 +1054,17 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if ( rc ) panic("Error %d setting up PV root page table\n", rc); if ( per_cpu(root_pgt, 0) ) + { get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0)); + /* + * All entry points which may need to switch page tables have to start + * with interrupts off. Re-write what pv_trap_init() has put there. + */ + _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, + &int80_direct_trap); + } + set_nr_sockets(); socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S index 3e8b6c104c..bf3a7aca71 100644 --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -202,7 +202,7 @@ ENTRY(compat_post_handle_exception) /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) - sti + /* sti could live here when we don't switch page tables below. */ CR4_PV32_RESTORE movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) @@ -222,9 +222,10 @@ ENTRY(cstar_enter) jz .Lcstar_cr3_okay mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lcstar_cr3_okay: + sti movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx movq VCPU_domain(%rbx),%rcx diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index bf41563862..cc5b95e5c1 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -150,7 +150,7 @@ UNLIKELY_END(exit_cr3) * %ss must be saved into the space left by the trampoline. */ ENTRY(lstar_enter) - sti + /* sti could live here when we don't switch page tables below. */ movq 8(%rsp),%rax /* Restore %rax. */ movq $FLAT_KERNEL_SS,8(%rsp) pushq %r11 @@ -169,9 +169,10 @@ ENTRY(lstar_enter) jz .Llstar_cr3_okay mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Llstar_cr3_okay: + sti movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx testb $TF_kernel_mode,VCPU_thread_flags(%rbx) @@ -254,7 +255,7 @@ process_trap: jmp test_all_events ENTRY(sysenter_entry) - sti + /* sti could live here when we don't switch page tables below. */ pushq $FLAT_USER_SS pushq $0 pushfq @@ -275,9 +276,10 @@ GLOBAL(sysenter_eflags_saved) jz .Lsyse_cr3_okay mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lsyse_cr3_okay: + sti movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx cmpb $0,VCPU_sysenter_disables_events(%rbx) @@ -324,9 +326,10 @@ ENTRY(int80_direct_trap) jz .Lint80_cr3_okay mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lint80_cr3_okay: + sti cmpb $0,untrusted_msi(%rip) UNLIKELY_START(ne, msi_check) @@ -534,7 +537,7 @@ ENTRY(common_interrupt) mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) neg %rcx .Lintr_cr3_load: - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 xor %ecx, %ecx mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) testb $3, UREGS_cs(%rsp) @@ -576,7 +579,7 @@ GLOBAL(handle_exception) mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) neg %rcx .Lxcpt_cr3_load: - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 xor %ecx, %ecx mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) testb $3, UREGS_cs(%rsp) @@ -772,7 +775,7 @@ ENTRY(double_fault) jns .Ldblf_cr3_load neg %rbx .Ldblf_cr3_load: - write_cr3 rbx, rdi, rsi + mov %rbx, %cr3 .Ldblf_cr3_okay: movq %rsp,%rdi @@ -807,7 +810,7 @@ handle_ist_exception: mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) neg %rcx .List_cr3_load: - write_cr3 rcx, rdi, rsi + mov %rcx, %cr3 movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) .List_cr3_okay: -- 2.30.2