/******************************************************************************
* machine_kexec.c
*
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * Portions derived from Linux's arch/x86/kernel/machine_kexec_64.c.
+ *
+ * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
+ *
* Xen port written by:
* - Simon 'Horms' Horman <horms@verge.net.au>
* - Magnus Damm <magnus@valinux.co.jp>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
*/
#include <xen/types.h>
#include <xen/guest_access.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
+#include <asm/page.h>
+#include <asm/machine_kexec.h>
-typedef void (*relocate_new_kernel_t)(
- unsigned long indirection_page,
- unsigned long *page_list,
- unsigned long start_address,
- unsigned int preserve_context);
-
-int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
+/*
+ * Add a mapping for a page to the page tables used during kexec.
+ */
+int machine_kexec_add_page(struct kexec_image *image, unsigned long vaddr,
+ unsigned long maddr)
{
- unsigned long prev_ma = 0;
- int fix_base = FIX_KEXEC_BASE_0 + (slot * (KEXEC_XEN_NO_PAGES >> 1));
- int k;
+ struct page_info *l4_page;
+ struct page_info *l3_page;
+ struct page_info *l2_page;
+ struct page_info *l1_page;
+ l4_pgentry_t *l4 = NULL;
+ l3_pgentry_t *l3 = NULL;
+ l2_pgentry_t *l2 = NULL;
+ l1_pgentry_t *l1 = NULL;
+ int ret = -ENOMEM;
+
+ l4_page = image->aux_page;
+ if ( !l4_page )
+ {
+ l4_page = kimage_alloc_control_page(image, 0);
+ if ( !l4_page )
+ goto out;
+ image->aux_page = l4_page;
+ }
- /* setup fixmap to point to our pages and record the virtual address
- * in every odd index in page_list[].
- */
+ l4 = __map_domain_page(l4_page);
+ l4 += l4_table_offset(vaddr);
+ if ( !(l4e_get_flags(*l4) & _PAGE_PRESENT) )
+ {
+ l3_page = kimage_alloc_control_page(image, 0);
+ if ( !l3_page )
+ goto out;
+ l4e_write(l4, l4e_from_page(l3_page, __PAGE_HYPERVISOR));
+ }
+ else
+ l3_page = l4e_get_page(*l4);
+
+ l3 = __map_domain_page(l3_page);
+ l3 += l3_table_offset(vaddr);
+ if ( !(l3e_get_flags(*l3) & _PAGE_PRESENT) )
+ {
+ l2_page = kimage_alloc_control_page(image, 0);
+ if ( !l2_page )
+ goto out;
+ l3e_write(l3, l3e_from_page(l2_page, __PAGE_HYPERVISOR));
+ }
+ else
+ l2_page = l3e_get_page(*l3);
+
+ l2 = __map_domain_page(l2_page);
+ l2 += l2_table_offset(vaddr);
+ if ( !(l2e_get_flags(*l2) & _PAGE_PRESENT) )
+ {
+ l1_page = kimage_alloc_control_page(image, 0);
+ if ( !l1_page )
+ goto out;
+ l2e_write(l2, l2e_from_page(l1_page, __PAGE_HYPERVISOR));
+ }
+ else
+ l1_page = l2e_get_page(*l2);
+
+ l1 = __map_domain_page(l1_page);
+ l1 += l1_table_offset(vaddr);
+ l1e_write(l1, l1e_from_pfn(maddr >> PAGE_SHIFT, __PAGE_HYPERVISOR));
+
+ ret = 0;
+out:
+ if ( l1 )
+ unmap_domain_page(l1);
+ if ( l2 )
+ unmap_domain_page(l2);
+ if ( l3 )
+ unmap_domain_page(l3);
+ if ( l4 )
+ unmap_domain_page(l4);
+ return ret;
+}
- for ( k = 0; k < KEXEC_XEN_NO_PAGES; k++ )
+int machine_kexec_load(struct kexec_image *image)
+{
+ void *code_page;
+ int ret;
+
+ switch ( image->arch )
{
- if ( (k & 1) == 0 )
- {
- /* Even pages: machine address. */
- prev_ma = image->page_list[k];
- }
- else
- {
- /* Odd pages: va for previous ma. */
- if ( is_pv_32on64_domain(dom0) )
- {
- /*
- * The compatability bounce code sets up a page table
- * with a 1-1 mapping of the first 1G of memory so
- * VA==PA here.
- *
- * This Linux purgatory code still sets up separate
- * high and low mappings on the control page (entries
- * 0 and 1) but it is harmless if they are equal since
- * that PT is not live at the time.
- */
- image->page_list[k] = prev_ma;
- }
- else
- {
- set_fixmap(fix_base + (k >> 1), prev_ma);
- image->page_list[k] = fix_to_virt(fix_base + (k >> 1));
- }
- }
+ case EM_386:
+ case EM_X86_64:
+ break;
+ default:
+ return -EINVAL;
}
+ code_page = __map_domain_page(image->control_code_page);
+ memcpy(code_page, kexec_reloc, kexec_reloc_size);
+ unmap_domain_page(code_page);
+
+ /*
+ * Add a mapping for the control code page to the same virtual
+ * address as kexec_reloc. This allows us to keep running after
+ * these page tables are loaded in kexec_reloc.
+ */
+ ret = machine_kexec_add_page(image, (unsigned long)kexec_reloc,
+ page_to_maddr(image->control_code_page));
+ if ( ret < 0 )
+ return ret;
+
return 0;
}
-void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image)
+void machine_kexec_unload(struct kexec_image *image)
{
+ /* no-op. kimage_free() frees all control pages. */
}
-void machine_reboot_kexec(xen_kexec_image_t *image)
+void machine_reboot_kexec(struct kexec_image *image)
{
BUG_ON(smp_processor_id() != 0);
smp_send_stop();
BUG();
}
-void machine_kexec(xen_kexec_image_t *image)
+void machine_kexec(struct kexec_image *image)
{
- struct desc_ptr gdt_desc = {
- .base = (unsigned long)(boot_cpu_gdt_table - FIRST_RESERVED_GDT_ENTRY),
- .limit = LAST_RESERVED_GDT_BYTE
- };
int i;
+ unsigned long reloc_flags = 0;
/* We are about to permenantly jump out of the Xen context into the kexec
* purgatory code. We really dont want to be still servicing interupts.
* not like running with NMIs disabled. */
enable_nmis();
- /*
- * compat_machine_kexec() returns to idle pagetables, which requires us
- * to be running on a static GDT mapping (idle pagetables have no GDT
- * mappings in their per-domain mapping area).
- */
- asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+ if ( image->arch == EM_386 )
+ reloc_flags |= KEXEC_RELOC_FLAG_COMPAT;
- if ( is_pv_32on64_domain(dom0) )
- {
- compat_machine_kexec(image->page_list[1],
- image->indirection_page,
- image->page_list,
- image->start_address);
- }
- else
- {
- relocate_new_kernel_t rnk;
-
- rnk = (relocate_new_kernel_t) image->page_list[1];
- (*rnk)(image->indirection_page, image->page_list,
- image->start_address,
- 0 /* preserve_context */);
- }
+ kexec_reloc(page_to_maddr(image->control_code_page),
+ page_to_maddr(image->aux_page),
+ image->head, image->entry_maddr, reloc_flags);
}
int machine_kexec_get(xen_kexec_range_t *range)
obj-y += mmconfig_64.o
obj-y += mmconfig-shared.o
obj-y += compat.o
-obj-bin-y += compat_kexec.o
obj-y += domain.o
obj-y += physdev.o
obj-y += platform_hypercall.o
obj-y += cpu_idle.o
obj-y += cpufreq.o
+obj-bin-y += kexec_reloc.o
obj-$(crash_debug) += gdbstub.o
+++ /dev/null
-/*
- * Compatibility kexec handler.
- */
-
-/*
- * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
- * currently true but if it ever changes then compat_pg_table will
- * need to be moved back below 4G at run time.
- */
-
-#include <xen/config.h>
-
-#include <asm/asm_defns.h>
-#include <asm/msr.h>
-#include <asm/page.h>
-
-/* The unrelocated physical address of a symbol. */
-#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
-
-/* Load physical address of symbol into register and relocate it. */
-#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
- add xen_phys_start(%rip), reg
-
-/*
- * Relocate a physical address in memory. Size of temporary register
- * determines size of the value to relocate.
- */
-#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
- add xen_phys_start(%rip), reg ; \
- mov reg, addr(%rip)
-
- .text
-
- .code64
-
-ENTRY(compat_machine_kexec)
- /* x86/64 x86/32 */
- /* %rdi - relocate_new_kernel_t CALL */
- /* %rsi - indirection page 4(%esp) */
- /* %rdx - page_list 8(%esp) */
- /* %rcx - start address 12(%esp) */
- /* cpu has pae 16(%esp) */
-
- /* Shim the 64 bit page_list into a 32 bit page_list. */
- mov $12,%r9
- lea compat_page_list(%rip), %rbx
-1: dec %r9
- movl (%rdx,%r9,8),%eax
- movl %eax,(%rbx,%r9,4)
- test %r9,%r9
- jnz 1b
-
- RELOCATE_SYM(compat_page_list,%rdx)
-
- /* Relocate compatibility mode entry point address. */
- RELOCATE_MEM(compatibility_mode_far,%eax)
-
- /* Relocate compat_pg_table. */
- RELOCATE_MEM(compat_pg_table, %rax)
- RELOCATE_MEM(compat_pg_table+0x8, %rax)
- RELOCATE_MEM(compat_pg_table+0x10,%rax)
- RELOCATE_MEM(compat_pg_table+0x18,%rax)
-
- /*
- * Setup an identity mapped region in PML4[0] of idle page
- * table.
- */
- RELOCATE_SYM(l3_identmap,%rax)
- or $0x63,%rax
- mov %rax, idle_pg_table(%rip)
-
- /* Switch to idle page table. */
- RELOCATE_SYM(idle_pg_table,%rax)
- movq %rax, %cr3
-
- /* Switch to identity mapped compatibility stack. */
- RELOCATE_SYM(compat_stack,%rax)
- movq %rax, %rsp
-
- /* Save xen_phys_start for 32 bit code. */
- movq xen_phys_start(%rip), %rbx
-
- /* Jump to low identity mapping in compatibility mode. */
- ljmp *compatibility_mode_far(%rip)
- ud2
-
-compatibility_mode_far:
- .long SYM_PHYS(compatibility_mode)
- .long __HYPERVISOR_CS32
-
- /*
- * We use 5 words of stack for the arguments passed to the kernel. The
- * kernel only uses 1 word before switching to its own stack. Allocate
- * 16 words to give "plenty" of room.
- */
- .fill 16,4,0
-compat_stack:
-
- .code32
-
-#undef RELOCATE_SYM
-#undef RELOCATE_MEM
-
-/*
- * Load physical address of symbol into register and relocate it. %rbx
- * contains xen_phys_start(%rip) saved before jump to compatibility
- * mode.
- */
-#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
- add %ebx, reg
-
-compatibility_mode:
- /* Setup some sane segments. */
- movl $__HYPERVISOR_DS32, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %fs
- movl %eax, %gs
- movl %eax, %ss
-
- /* Push arguments onto stack. */
- pushl $0 /* 20(%esp) - preserve context */
- pushl $1 /* 16(%esp) - cpu has pae */
- pushl %ecx /* 12(%esp) - start address */
- pushl %edx /* 8(%esp) - page list */
- pushl %esi /* 4(%esp) - indirection page */
- pushl %edi /* 0(%esp) - CALL */
-
- /* Disable paging and therefore leave 64 bit mode. */
- movl %cr0, %eax
- andl $~X86_CR0_PG, %eax
- movl %eax, %cr0
-
- /* Switch to 32 bit page table. */
- RELOCATE_SYM(compat_pg_table, %eax)
- movl %eax, %cr3
-
- /* Clear MSR_EFER[LME], disabling long mode */
- movl $MSR_EFER,%ecx
- rdmsr
- btcl $_EFER_LME,%eax
- wrmsr
-
- /* Re-enable paging, but only 32 bit mode now. */
- movl %cr0, %eax
- orl $X86_CR0_PG, %eax
- movl %eax, %cr0
- jmp 1f
-1:
-
- popl %eax
- call *%eax
- ud2
-
- .data
- .align 4
-compat_page_list:
- .fill 12,4,0
-
- .align 32,0
-
- /*
- * These compat page tables contain an identity mapping of the
- * first 4G of the physical address space.
- */
-compat_pg_table:
- .long SYM_PHYS(compat_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
- .long SYM_PHYS(compat_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
- .long SYM_PHYS(compat_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
- .long SYM_PHYS(compat_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
-
- .section .data.page_aligned, "aw", @progbits
- .align PAGE_SIZE,0
-compat_pg_table_l2:
- .macro identmap from=0, count=512
- .if \count-1
- identmap "(\from+0)","(\count/2)"
- identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
- .else
- .quad 0x00000000000000e3 + \from
- .endif
- .endm
-
- identmap 0x00000000
- identmap 0x40000000
- identmap 0x80000000
- identmap 0xc0000000
--- /dev/null
+/*
+ * Relocate a kexec_image to its destination and call it.
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * Portions derived from Linux's arch/x86/kernel/relocate_kernel_64.S.
+ *
+ * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <xen/config.h>
+#include <xen/kimage.h>
+
+#include <asm/asm_defns.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/machine_kexec.h>
+
+ .text
+ .align PAGE_SIZE
+ .code64
+
+ENTRY(kexec_reloc)
+ /* %rdi - code page maddr */
+ /* %rsi - page table maddr */
+ /* %rdx - indirection page maddr */
+ /* %rcx - entry maddr (%rbp) */
+ /* %r8 - flags */
+
+ movq %rcx, %rbp
+
+ /* Setup stack. */
+ leaq (reloc_stack - kexec_reloc)(%rdi), %rsp
+
+ /* Load reloc page table. */
+ movq %rsi, %cr3
+
+ /* Jump to identity mapped code. */
+ leaq (identity_mapped - kexec_reloc)(%rdi), %rax
+ jmpq *%rax
+
+identity_mapped:
+ /*
+ * Set cr0 to a known state:
+ * - Paging enabled
+ * - Alignment check disabled
+ * - Write protect disabled
+ * - No task switch
+ * - Don't do FP software emulation.
+ * - Protected mode enabled
+ */
+ movq %cr0, %rax
+ andl $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax
+ orl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movq %rax, %cr0
+
+ /*
+ * Set cr4 to a known state:
+ * - physical address extension enabled
+ */
+ movl $X86_CR4_PAE, %eax
+ movq %rax, %cr4
+
+ movq %rdx, %rdi
+ call relocate_pages
+
+ /* Need to switch to 32-bit mode? */
+ testq $KEXEC_RELOC_FLAG_COMPAT, %r8
+ jnz call_32_bit
+
+call_64_bit:
+ /* Call the image entry point. This should never return. */
+ callq *%rbp
+ ud2
+
+call_32_bit:
+ /* Setup IDT. */
+ lidt compat_mode_idt(%rip)
+
+ /* Load compat GDT. */
+ leaq compat_mode_gdt(%rip), %rax
+ movq %rax, (compat_mode_gdt_desc + 2)(%rip)
+ lgdt compat_mode_gdt_desc(%rip)
+
+ /* Relocate compatibility mode entry point address. */
+ leal compatibility_mode(%rip), %eax
+ movl %eax, compatibility_mode_far(%rip)
+
+ /* Enter compatibility mode. */
+ ljmp *compatibility_mode_far(%rip)
+
+relocate_pages:
+ /* %rdi - indirection page maddr */
+ pushq %rbx
+
+ cld
+ movq %rdi, %rbx
+ xorl %edi, %edi
+ xorl %esi, %esi
+
+next_entry: /* top, read another word for the indirection page */
+
+ movq (%rbx), %rcx
+ addq $8, %rbx
+is_dest:
+ testb $IND_DESTINATION, %cl
+ jz is_ind
+ movq %rcx, %rdi
+ andq $PAGE_MASK, %rdi
+ jmp next_entry
+is_ind:
+ testb $IND_INDIRECTION, %cl
+ jz is_done
+ movq %rcx, %rbx
+ andq $PAGE_MASK, %rbx
+ jmp next_entry
+is_done:
+ testb $IND_DONE, %cl
+ jnz done
+is_source:
+ testb $IND_SOURCE, %cl
+ jz is_zero
+ movq %rcx, %rsi /* For every source page do a copy */
+ andq $PAGE_MASK, %rsi
+ movl $(PAGE_SIZE / 8), %ecx
+ rep movsq
+ jmp next_entry
+is_zero:
+ testb $IND_ZERO, %cl
+ jz next_entry
+ movl $(PAGE_SIZE / 8), %ecx /* Zero the destination page. */
+ xorl %eax, %eax
+ rep stosq
+ jmp next_entry
+done:
+ popq %rbx
+ ret
+
+ .code32
+
+compatibility_mode:
+ /* Setup some sane segments. */
+ movl $0x0008, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %eax, %ss
+
+ /* Disable paging and therefore leave 64 bit mode. */
+ movl %cr0, %eax
+ andl $~X86_CR0_PG, %eax
+ movl %eax, %cr0
+
+ /* Disable long mode */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ andl $~EFER_LME, %eax
+ wrmsr
+
+ /* Clear cr4 to disable PAE. */
+ xorl %eax, %eax
+ movl %eax, %cr4
+
+ /* Call the image entry point. This should never return. */
+ call *%ebp
+ ud2
+
+ .align 4
+compatibility_mode_far:
+ .long 0x00000000 /* set in call_32_bit above */
+ .word 0x0010
+
+compat_mode_gdt_desc:
+ .word (3*8)-1
+ .quad 0x0000000000000000 /* set in call_32_bit above */
+
+ .align 8
+compat_mode_gdt:
+ .quad 0x0000000000000000 /* null */
+ .quad 0x00cf92000000ffff /* 0x0008 ring 0 data */
+ .quad 0x00cf9a000000ffff /* 0x0010 ring 0 code, compatibility */
+
+compat_mode_idt:
+ .word 0 /* limit */
+ .long 0 /* base */
+
+ /*
+ * 16 words of stack are more than enough.
+ */
+ .fill 16,8,0
+reloc_stack:
+
+ .globl kexec_reloc_size
+kexec_reloc_size:
+ .long . - kexec_reloc
#include <xen/version.h>
#include <xen/console.h>
#include <xen/kexec.h>
+#include <xen/kimage.h>
#include <public/elfnote.h>
#include <xsm/xsm.h>
#include <xen/cpu.h>
static cpumask_t crash_saved_cpus;
-static xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
+static struct kexec_image *kexec_image[KEXEC_IMAGE_NR];
#define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0)
#define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1)
static unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */
-static spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED;
-
static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
static size_t vmcoreinfo_size = 0;
kexec_common_shutdown();
kexec_crash_save_cpu();
machine_crash_shutdown();
- machine_kexec(&kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]);
+ machine_kexec(kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]);
BUG();
}
static long kexec_reboot(void *_image)
{
- xen_kexec_image_t *image = _image;
+ struct kexec_image *image = _image;
kexecing = TRUE;
#endif
}
-static int kexec_load_unload_internal(unsigned long op, xen_kexec_load_v1_t *load)
+static void kexec_unload_image(struct kexec_image *image)
{
- xen_kexec_image_t *image;
+ if ( !image )
+ return;
+
+ machine_kexec_unload(image);
+ kimage_free(image);
+}
+
+static int kexec_exec(XEN_GUEST_HANDLE_PARAM(void) uarg)
+{
+ xen_kexec_exec_t exec;
+ struct kexec_image *image;
+ int base, bit, pos, ret = -EINVAL;
+
+ if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
+ return -EFAULT;
+
+ if ( kexec_load_get_bits(exec.type, &base, &bit) )
+ return -EINVAL;
+
+ pos = (test_bit(bit, &kexec_flags) != 0);
+
+ /* Only allow kexec/kdump into loaded images */
+ if ( !test_bit(base + pos, &kexec_flags) )
+ return -ENOENT;
+
+ switch (exec.type)
+ {
+ case KEXEC_TYPE_DEFAULT:
+ image = kexec_image[base + pos];
+ ret = continue_hypercall_on_cpu(0, kexec_reboot, image);
+ break;
+ case KEXEC_TYPE_CRASH:
+ kexec_crash(); /* Does not return */
+ break;
+ }
+
+ return -EINVAL; /* never reached */
+}
+
+static int kexec_swap_images(int type, struct kexec_image *new,
+ struct kexec_image **old)
+{
+ static DEFINE_SPINLOCK(kexec_lock);
int base, bit, pos;
- int ret = 0;
+ int new_slot, old_slot;
+
+ *old = NULL;
+
+ spin_lock(&kexec_lock);
+
+ if ( test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
+ {
+ spin_unlock(&kexec_lock);
+ return -EBUSY;
+ }
- if ( kexec_load_get_bits(load->type, &base, &bit) )
+ if ( kexec_load_get_bits(type, &base, &bit) )
return -EINVAL;
pos = (test_bit(bit, &kexec_flags) != 0);
+ old_slot = base + pos;
+ new_slot = base + !pos;
- /* Load the user data into an unused image */
- if ( op == KEXEC_CMD_kexec_load )
+ if ( new )
{
- image = &kexec_image[base + !pos];
+ kexec_image[new_slot] = new;
+ set_bit(new_slot, &kexec_flags);
+ }
+ change_bit(bit, &kexec_flags);
- BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */
+ clear_bit(old_slot, &kexec_flags);
+ *old = kexec_image[old_slot];
- memcpy(image, &load->image, sizeof(*image));
+ spin_unlock(&kexec_lock);
- if ( !(ret = machine_kexec_load(load->type, base + !pos, image)) )
- {
- /* Set image present bit */
- set_bit((base + !pos), &kexec_flags);
+ return 0;
+}
- /* Make new image the active one */
- change_bit(bit, &kexec_flags);
- }
+static int kexec_load_slot(struct kexec_image *kimage)
+{
+ struct kexec_image *old_kimage;
+ int ret = -ENOMEM;
+
+ ret = machine_kexec_load(kimage);
+ if ( ret < 0 )
+ return ret;
+
+ crash_save_vmcoreinfo();
+
+ ret = kexec_swap_images(kimage->type, kimage, &old_kimage);
+ if ( ret < 0 )
+ return ret;
+
+ kexec_unload_image(old_kimage);
+
+ return 0;
+}
+
+static uint16_t kexec_load_v1_arch(void)
+{
+#ifdef CONFIG_X86
+ return is_pv_32on64_domain(dom0) ? EM_386 : EM_X86_64;
+#else
+ return EM_NONE;
+#endif
+}
- crash_save_vmcoreinfo();
+static int kexec_segments_add_segment(
+ unsigned int *nr_segments, xen_kexec_segment_t *segments,
+ unsigned long mfn)
+{
+ paddr_t maddr = (paddr_t)mfn << PAGE_SHIFT;
+ unsigned int n = *nr_segments;
+
+ /* Need a new segment? */
+ if ( n == 0
+ || segments[n-1].dest_maddr + segments[n-1].dest_size != maddr )
+ {
+ n++;
+ if ( n > KEXEC_SEGMENT_MAX )
+ return -EINVAL;
+ *nr_segments = n;
+
+ set_xen_guest_handle(segments[n-1].buf.h, NULL);
+ segments[n-1].buf_size = 0;
+ segments[n-1].dest_maddr = maddr;
+ segments[n-1].dest_size = 0;
}
- /* Unload the old image if present and load successful */
- if ( ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
+ return 0;
+}
+
+static int kexec_segments_from_ind_page(unsigned long mfn,
+ unsigned int *nr_segments,
+ xen_kexec_segment_t *segments,
+ bool_t compat)
+{
+ void *page;
+ kimage_entry_t *entry;
+ int ret = 0;
+
+ page = map_domain_page(mfn);
+
+ /*
+ * Walk the indirection page list, adding destination pages to the
+ * segments.
+ */
+ for ( entry = page; ; )
{
- if ( test_and_clear_bit((base + pos), &kexec_flags) )
+ unsigned long ind;
+
+ ind = kimage_entry_ind(entry, compat);
+ mfn = kimage_entry_mfn(entry, compat);
+
+ switch ( ind )
{
- image = &kexec_image[base + pos];
- machine_kexec_unload(load->type, base + pos, image);
+ case IND_DESTINATION:
+ ret = kexec_segments_add_segment(nr_segments, segments, mfn);
+ if ( ret < 0 )
+ goto done;
+ break;
+ case IND_INDIRECTION:
+ unmap_domain_page(page);
+ entry = page = map_domain_page(mfn);
+ continue;
+ case IND_DONE:
+ goto done;
+ case IND_SOURCE:
+ if ( *nr_segments == 0 )
+ {
+ ret = -EINVAL;
+ goto done;
+ }
+ segments[*nr_segments-1].dest_size += PAGE_SIZE;
+ break;
+ default:
+ ret = -EINVAL;
+ goto done;
}
+ entry = kimage_entry_next(entry, compat);
}
+done:
+ unmap_domain_page(page);
+ return ret;
+}
+static int kexec_do_load_v1(xen_kexec_load_v1_t *load, int compat)
+{
+ struct kexec_image *kimage = NULL;
+ xen_kexec_segment_t *segments;
+ uint16_t arch;
+ unsigned int nr_segments = 0;
+ unsigned long ind_mfn = load->image.indirection_page >> PAGE_SHIFT;
+ int ret;
+
+ arch = kexec_load_v1_arch();
+ if ( arch == EM_NONE )
+ return -ENOSYS;
+
+ segments = xmalloc_array(xen_kexec_segment_t, KEXEC_SEGMENT_MAX);
+ if ( segments == NULL )
+ return -ENOMEM;
+
+ /*
+ * Work out the image segments (destination only) from the
+ * indirection pages.
+ *
+ * This is needed so we don't allocate pages that will overlap
+ * with the destination when building the new set of indirection
+ * pages below.
+ */
+ ret = kexec_segments_from_ind_page(ind_mfn, &nr_segments, segments, compat);
+ if ( ret < 0 )
+ goto error;
+
+ ret = kimage_alloc(&kimage, load->type, arch, load->image.start_address,
+ nr_segments, segments);
+ if ( ret < 0 )
+ goto error;
+
+ /*
+ * Build a new set of indirection pages in the native format.
+ *
+ * This walks the guest provided indirection pages a second time.
+ * The guest could have altered then, invalidating the segment
+ * information constructed above. This will only result in the
+ * resulting image being potentially unrelocatable.
+ */
+ ret = kimage_build_ind(kimage, ind_mfn, compat);
+ if ( ret < 0 )
+ goto error;
+
+ ret = kexec_load_slot(kimage);
+ if ( ret < 0 )
+ goto error;
+
+ return 0;
+
+error:
+ if ( !kimage )
+ xfree(segments);
+ kimage_free(kimage);
return ret;
}
-static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) uarg)
+static int kexec_load_v1(XEN_GUEST_HANDLE_PARAM(void) uarg)
{
xen_kexec_load_v1_t load;
if ( unlikely(copy_from_guest(&load, uarg, 1)) )
return -EFAULT;
- return kexec_load_unload_internal(op, &load);
+ return kexec_do_load_v1(&load, 0);
}
-static int kexec_load_unload_compat(unsigned long op,
- XEN_GUEST_HANDLE_PARAM(void) uarg)
+static int kexec_load_v1_compat(XEN_GUEST_HANDLE_PARAM(void) uarg)
{
#ifdef CONFIG_COMPAT
compat_kexec_load_v1_t compat_load;
load.type = compat_load.type;
XLAT_kexec_image(&load.image, &compat_load.image);
- return kexec_load_unload_internal(op, &load);
-#else /* CONFIG_COMPAT */
+ return kexec_do_load_v1(&load, 1);
+#else
return 0;
-#endif /* CONFIG_COMPAT */
+#endif
}
-static int kexec_exec(XEN_GUEST_HANDLE_PARAM(void) uarg)
+static int kexec_load(XEN_GUEST_HANDLE_PARAM(void) uarg)
{
- xen_kexec_exec_t exec;
- xen_kexec_image_t *image;
- int base, bit, pos, ret = -EINVAL;
+ xen_kexec_load_t load;
+ xen_kexec_segment_t *segments;
+ struct kexec_image *kimage = NULL;
+ int ret;
- if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
+ if ( copy_from_guest(&load, uarg, 1) )
return -EFAULT;
- if ( kexec_load_get_bits(exec.type, &base, &bit) )
+ if ( load.nr_segments >= KEXEC_SEGMENT_MAX )
return -EINVAL;
- pos = (test_bit(bit, &kexec_flags) != 0);
-
- /* Only allow kexec/kdump into loaded images */
- if ( !test_bit(base + pos, &kexec_flags) )
- return -ENOENT;
+ segments = xmalloc_array(xen_kexec_segment_t, load.nr_segments);
+ if ( segments == NULL )
+ return -ENOMEM;
- switch (exec.type)
+ if ( copy_from_guest(segments, load.segments.h, load.nr_segments) )
{
- case KEXEC_TYPE_DEFAULT:
- image = &kexec_image[base + pos];
- ret = continue_hypercall_on_cpu(0, kexec_reboot, image);
- break;
- case KEXEC_TYPE_CRASH:
- kexec_crash(); /* Does not return */
- break;
+ ret = -EFAULT;
+ goto error;
}
- return -EINVAL; /* never reached */
+ ret = kimage_alloc(&kimage, load.type, load.arch, load.entry_maddr,
+ load.nr_segments, segments);
+ if ( ret < 0 )
+ goto error;
+
+ ret = kimage_load_segments(kimage);
+ if ( ret < 0 )
+ goto error;
+
+ ret = kexec_load_slot(kimage);
+ if ( ret < 0 )
+ goto error;
+
+ return 0;
+
+error:
+ if ( ! kimage )
+ xfree(segments);
+ kimage_free(kimage);
+ return ret;
+}
+
+static int kexec_do_unload(xen_kexec_unload_t *unload)
+{
+ struct kexec_image *old_kimage;
+ int ret;
+
+ ret = kexec_swap_images(unload->type, NULL, &old_kimage);
+ if ( ret < 0 )
+ return ret;
+
+ kexec_unload_image(old_kimage);
+
+ return 0;
+}
+
+static int kexec_unload_v1(XEN_GUEST_HANDLE_PARAM(void) uarg)
+{
+ xen_kexec_load_v1_t load;
+ xen_kexec_unload_t unload;
+
+ if ( copy_from_guest(&load, uarg, 1) )
+ return -EFAULT;
+
+ unload.type = load.type;
+ return kexec_do_unload(&unload);
+}
+
+static int kexec_unload_v1_compat(XEN_GUEST_HANDLE_PARAM(void) uarg)
+{
+#ifdef CONFIG_COMPAT
+ compat_kexec_load_v1_t compat_load;
+ xen_kexec_unload_t unload;
+
+ if ( copy_from_guest(&compat_load, uarg, 1) )
+ return -EFAULT;
+
+ unload.type = compat_load.type;
+ return kexec_do_unload(&unload);
+#else
+ return 0;
+#endif
+}
+
+static int kexec_unload(XEN_GUEST_HANDLE_PARAM(void) uarg)
+{
+ xen_kexec_unload_t unload;
+
+ if ( unlikely(copy_from_guest(&unload, uarg, 1)) )
+ return -EFAULT;
+
+ return kexec_do_unload(&unload);
}
static int do_kexec_op_internal(unsigned long op,
XEN_GUEST_HANDLE_PARAM(void) uarg,
bool_t compat)
{
- unsigned long flags;
int ret = -EINVAL;
ret = xsm_kexec(XSM_PRIV);
ret = kexec_get_range(uarg);
break;
case KEXEC_CMD_kexec_load_v1:
+ if ( compat )
+ ret = kexec_load_v1_compat(uarg);
+ else
+ ret = kexec_load_v1(uarg);
+ break;
case KEXEC_CMD_kexec_unload_v1:
- spin_lock_irqsave(&kexec_lock, flags);
- if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
- {
- if (compat)
- ret = kexec_load_unload_compat(op, uarg);
- else
- ret = kexec_load_unload(op, uarg);
- }
- spin_unlock_irqrestore(&kexec_lock, flags);
+ if ( compat )
+ ret = kexec_unload_v1_compat(uarg);
+ else
+ ret = kexec_unload_v1(uarg);
break;
case KEXEC_CMD_kexec:
ret = kexec_exec(uarg);
break;
+ case KEXEC_CMD_kexec_load:
+ ret = kexec_load(uarg);
+ break;
+ case KEXEC_CMD_kexec_unload:
+ ret = kexec_unload(uarg);
+ break;
}
return ret;
image->control_code_page = kimage_alloc_control_page(image, MEMF_bits(32));
if ( !image->control_code_page )
goto out;
+ result = machine_kexec_add_page(image,
+ page_to_maddr(image->control_code_page),
+ page_to_maddr(image->control_code_page));
+ if ( result < 0 )
+ goto out;
/* Add an empty indirection page. */
image->entry_page = kimage_alloc_control_page(image, 0);
if ( !image->entry_page )
goto out;
+ result = machine_kexec_add_page(image, page_to_maddr(image->entry_page),
+ page_to_maddr(image->entry_page));
+ if ( result < 0 )
+ goto out;
image->head = page_to_maddr(image->entry_page);
if ( addr == destination )
{
page_list_del(page, &image->dest_pages);
- return page;
+ goto found;
}
}
page = NULL;
page_list_add(page, &image->dest_pages);
}
}
+found:
+ machine_kexec_add_page(image, page_to_maddr(page), page_to_maddr(page));
return page;
}
static int kimage_load_segment(struct kexec_image *image, xen_kexec_segment_t *segment)
{
int result = -ENOMEM;
+ paddr_t addr;
if ( !guest_handle_is_null(segment->buf.h) )
{
}
}
+ for ( addr = segment->dest_maddr & PAGE_MASK;
+ addr < segment->dest_maddr + segment->dest_size; addr += PAGE_SIZE )
+ {
+ result = machine_kexec_add_page(image, addr, addr);
+ if ( result < 0 )
+ break;
+ }
+
return result;
}
return 0;
}
+kimage_entry_t *kimage_entry_next(kimage_entry_t *entry, bool_t compat)
+{
+ if ( compat )
+ return (kimage_entry_t *)((uint32_t *)entry + 1);
+ return entry + 1;
+}
+
+unsigned long kimage_entry_mfn(kimage_entry_t *entry, bool_t compat)
+{
+ if ( compat )
+ return *(uint32_t *)entry >> PAGE_SHIFT;
+ return *entry >> PAGE_SHIFT;
+}
+
+unsigned long kimage_entry_ind(kimage_entry_t *entry, bool_t compat)
+{
+ if ( compat )
+ return *(uint32_t *)entry & 0xf;
+ return *entry & 0xf;
+}
+
+int kimage_build_ind(struct kexec_image *image, unsigned long ind_mfn,
+ bool_t compat)
+{
+ void *page;
+ kimage_entry_t *entry;
+ int ret = 0;
+ paddr_t dest = KIMAGE_NO_DEST;
+
+ page = map_domain_page(ind_mfn);
+ if ( !page )
+ return -ENOMEM;
+
+ /*
+ * Walk the guest-supplied indirection pages, adding entries to
+ * the image's indirection pages.
+ */
+ for ( entry = page; ; )
+ {
+ unsigned long ind;
+ unsigned long mfn;
+
+ ind = kimage_entry_ind(entry, compat);
+ mfn = kimage_entry_mfn(entry, compat);
+
+ switch ( ind )
+ {
+ case IND_DESTINATION:
+ dest = (paddr_t)mfn << PAGE_SHIFT;
+ ret = kimage_set_destination(image, dest);
+ if ( ret < 0 )
+ goto done;
+ break;
+ case IND_INDIRECTION:
+ unmap_domain_page(page);
+ page = map_domain_page(mfn);
+ entry = page;
+ continue;
+ case IND_DONE:
+ kimage_terminate(image);
+ goto done;
+ case IND_SOURCE:
+ {
+ struct page_info *guest_page, *xen_page;
+
+ guest_page = mfn_to_page(mfn);
+ if ( !get_page(guest_page, current->domain) )
+ {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ xen_page = kimage_alloc_page(image, dest);
+ if ( !xen_page )
+ {
+ put_page(guest_page);
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ copy_domain_page(page_to_mfn(xen_page), mfn);
+ put_page(guest_page);
+
+ ret = kimage_add_page(image, page_to_maddr(xen_page));
+ if ( ret < 0 )
+ goto done;
+ dest += PAGE_SIZE;
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ goto done;
+ }
+ entry = kimage_entry_next(entry, compat);
+ }
+done:
+ unmap_domain_page(page);
+ return ret;
+}
+
/*
* Local variables:
* mode: C
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
FIX_HPET_BASE,
- FIX_KEXEC_BASE_0,
- FIX_KEXEC_BASE_END = FIX_KEXEC_BASE_0 \
- + ((KEXEC_XEN_NO_PAGES >> 1) * KEXEC_IMAGE_NR) - 1,
FIX_TBOOT_SHARED_BASE,
FIX_MSIX_IO_RESERV_BASE,
FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
--- /dev/null
+#ifndef __X86_MACHINE_KEXEC_H__
+#define __X86_MACHINE_KEXEC_H__
+
+#define KEXEC_RELOC_FLAG_COMPAT 0x1 /* 32-bit image */
+
+#ifndef __ASSEMBLY__
+
+extern void kexec_reloc(unsigned long reloc_code, unsigned long reloc_pt,
+ unsigned long ind_maddr, unsigned long entry_maddr,
+ unsigned long flags);
+
+extern unsigned int kexec_reloc_size;
+
+#endif
+
+#endif /* __X86_MACHINE_KEXEC_H__ */
#include <public/kexec.h>
#include <asm/percpu.h>
#include <xen/elfcore.h>
+#include <xen/kimage.h>
typedef struct xen_kexec_reserve {
unsigned long size;
extern paddr_t crashinfo_maxaddr_bits;
void kexec_early_calculations(void);
-int machine_kexec_load(int type, int slot, xen_kexec_image_t *image);
-void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image);
+int machine_kexec_add_page(struct kexec_image *image, unsigned long vaddr,
+ unsigned long maddr);
+int machine_kexec_load(struct kexec_image *image);
+void machine_kexec_unload(struct kexec_image *image);
void machine_kexec_reserved(xen_kexec_reserve_t *reservation);
-void machine_reboot_kexec(xen_kexec_image_t *image);
-void machine_kexec(xen_kexec_image_t *image);
+void machine_reboot_kexec(struct kexec_image *image);
+void machine_kexec(struct kexec_image *image);
void kexec_crash(void);
void kexec_crash_save_cpu(void);
crash_xen_info_t *kexec_crash_save_info(void);
int machine_kexec_get(xen_kexec_range_t *range);
int machine_kexec_get_xen(xen_kexec_range_t *range);
-void compat_machine_kexec(unsigned long rnk,
- unsigned long indirection_page,
- unsigned long *page_list,
- unsigned long start_address);
-
/* vmcoreinfo stuff */
#define VMCOREINFO_BYTES (4096)
#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN"
struct page_info *kimage_alloc_control_page(struct kexec_image *image,
unsigned memflags);
+kimage_entry_t *kimage_entry_next(kimage_entry_t *entry, bool_t compat);
+unsigned long kimage_entry_mfn(kimage_entry_t *entry, bool_t compat);
+unsigned long kimage_entry_ind(kimage_entry_t *entry, bool_t compat);
+int kimage_build_ind(struct kexec_image *image, unsigned long ind_mfn,
+ bool_t compat);
+
#endif /* __ASSEMBLY__ */
#endif /* __XEN_KIMAGE_H__ */