[HVM][LINUX][TOOLS] Make xm {shutdown|reboot} do something sensible for HVM
authorSteven Smith <ssmith@xensource.com>
Tue, 31 Oct 2006 09:54:49 +0000 (09:54 +0000)
committerSteven Smith <ssmith@xensource.com>
Tue, 31 Oct 2006 09:54:49 +0000 (09:54 +0000)
domains with PV drivers loaded.  This patch creates a new PV-on-HVM
module, reboot.ko, which, when loaded, creates control/reboot_module
in the store.  The tools notice this, and disable the watch which would
normally destroy HVM domains which are the target of an xm shutdown
command, allowing the reboot module to shut the domain down cleanly.

Signed-off-by: Tetsu Yamamoto <yamamoto.tetsu@jp.fujitsu.com>
(Checkin comments by Steven Smith <sos22@cam.ac.uk>)

linux-2.6-xen-sparse/drivers/xen/core/Makefile
linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c [new file with mode: 0644]
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
linux-2.6-xen-sparse/include/xen/reboot.h [new file with mode: 0644]
tools/python/xen/xend/image.py
unmodified_drivers/linux-2.6/Makefile
unmodified_drivers/linux-2.6/mkbuildtree [changed mode: 0644->0755]
unmodified_drivers/linux-2.6/util/Kbuild [new file with mode: 0644]

index c1b0c1bd51f89be57b29e45f396ff0f56c1959fa..6154454339bf62ef7160b19f02fb75260bc18b68 100644 (file)
@@ -9,5 +9,5 @@ obj-$(CONFIG_SYSFS)             += hypervisor_sysfs.o
 obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
 obj-$(CONFIG_XEN_SYSFS)                += xen_sysfs.o
 obj-$(CONFIG_XEN_SKBUFF)       += skbuff.o
-obj-$(CONFIG_XEN_REBOOT)       += reboot.o
+obj-$(CONFIG_XEN_REBOOT)       += reboot.o machine_reboot.o
 obj-$(CONFIG_XEN_SMPBOOT)      += smpboot.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
new file mode 100644 (file)
index 0000000..c14453e
--- /dev/null
@@ -0,0 +1,206 @@
+#define __KERNEL_SYSCALLS__
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stringify.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/xenbus.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <xen/gnttab.h>
+#include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/reboot.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void machine_emergency_restart(void)
+{
+       /* We really want to get pending console data out before we die. */
+       xencons_force_flush();
+       HYPERVISOR_shutdown(SHUTDOWN_reboot);
+}
+
+void machine_restart(char * __unused)
+{
+       machine_emergency_restart();
+}
+
+void machine_halt(void)
+{
+       machine_power_off();
+}
+
+void machine_power_off(void)
+{
+       /* We really want to get pending console data out before we die. */
+       xencons_force_flush();
+       if (pm_power_off)
+               pm_power_off();
+       HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+int reboot_thru_bios = 0;      /* for dmi_scan.c */
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/* Ensure we run on the idle task page tables so that we will
+   switch page tables before running user space. This is needed
+   on architectures with separate kernel and user page tables
+   because the user page table pointer is not saved/restored. */
+static void switch_idle_mm(void)
+{
+       struct mm_struct *mm = current->active_mm;
+
+       if (mm == &init_mm)
+               return;
+
+       atomic_inc(&init_mm.mm_count);
+       switch_mm(mm, &init_mm, current);
+       current->active_mm = &init_mm;
+       mmdrop(mm);
+}
+
+static void pre_suspend(void)
+{
+       HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+       clear_fixmap(FIX_SHARED_INFO);
+
+       xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+       xen_start_info->console.domU.mfn =
+               mfn_to_pfn(xen_start_info->console.domU.mfn);
+}
+
+static void post_suspend(void)
+{
+       int i, j, k, fpp;
+       extern unsigned long max_pfn;
+       extern unsigned long *pfn_to_mfn_frame_list_list;
+       extern unsigned long *pfn_to_mfn_frame_list[];
+
+       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+
+       memset(empty_zero_page, 0, PAGE_SIZE);
+
+       HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+               virt_to_mfn(pfn_to_mfn_frame_list_list);
+
+       fpp = PAGE_SIZE/sizeof(unsigned long);
+       for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
+               if ((j % fpp) == 0) {
+                       k++;
+                       pfn_to_mfn_frame_list_list[k] =
+                               virt_to_mfn(pfn_to_mfn_frame_list[k]);
+                       j = 0;
+               }
+               pfn_to_mfn_frame_list[k][j] =
+                       virt_to_mfn(&phys_to_machine_mapping[i]);
+       }
+       HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+}
+
+#else /* !(defined(__i386__) || defined(__x86_64__)) */
+
+#define switch_idle_mm()       ((void)0)
+#define mm_pin_all()           ((void)0)
+#define pre_suspend()          ((void)0)
+#define post_suspend()         ((void)0)
+
+#endif
+
+int __do_suspend(void *ignore)
+{
+       int err;
+
+       extern void time_resume(void);
+
+       BUG_ON(smp_processor_id() != 0);
+       BUG_ON(in_interrupt());
+
+#if defined(__i386__) || defined(__x86_64__)
+       if (xen_feature(XENFEAT_auto_translated_physmap)) {
+               printk(KERN_WARNING "Cannot suspend in "
+                      "auto_translated_physmap mode.\n");
+               return -EOPNOTSUPP;
+       }
+#endif
+
+       err = smp_suspend();
+       if (err)
+               return err;
+
+       xenbus_suspend();
+
+       preempt_disable();
+
+       mm_pin_all();
+       local_irq_disable();
+       preempt_enable();
+
+       gnttab_suspend();
+
+       pre_suspend();
+
+       /*
+        * We'll stop somewhere inside this hypercall. When it returns,
+        * we'll start resuming after the restore.
+        */
+       HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+       shutting_down = SHUTDOWN_INVALID;
+
+       post_suspend();
+
+       gnttab_resume();
+
+       irq_resume();
+
+       time_resume();
+
+       switch_idle_mm();
+
+       local_irq_enable();
+
+       xencons_resume();
+
+       xenbus_resume();
+
+       smp_resume();
+
+       return err;
+}
+
+int kthread_create_on_cpu(int (*f)(void *arg),
+                                void *arg,
+                                const char *name,
+                                int cpu)
+{
+       struct task_struct *p;
+       p = kthread_create(f, arg, name);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+       kthread_bind(p, cpu);
+       wake_up_process(p);
+       return 0;
+}
index 34c3930961aea72df93a8da4747ddfc1de61ae69..9107b87b2a38e0386cbe7684304f27a4dca5b154 100644 (file)
 #define __KERNEL_SYSCALLS__
 #include <linux/version.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/sysrq.h>
-#include <linux/stringify.h>
-#include <asm/irq.h>
-#include <asm/mmu_context.h>
-#include <xen/evtchn.h>
 #include <asm/hypervisor.h>
-#include <xen/interface/dom0_ops.h>
 #include <xen/xenbus.h>
-#include <linux/cpu.h>
 #include <linux/kthread.h>
-#include <xen/gnttab.h>
-#include <xen/xencons.h>
-#include <xen/cpu_hotplug.h>
-
-extern void ctrl_alt_del(void);
-
-#define SHUTDOWN_INVALID  -1
-#define SHUTDOWN_POWEROFF  0
-#define SHUTDOWN_SUSPEND   2
-/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
- * report a crash, not be instructed to crash!
- * HALT is the same as POWEROFF, as far as we're concerned.  The tools use
- * the distinction when we return the reason code to them.
- */
-#define SHUTDOWN_HALT      4
-
-#if defined(__i386__) || defined(__x86_64__)
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void machine_emergency_restart(void)
-{
-       /* We really want to get pending console data out before we die. */
-       xencons_force_flush();
-       HYPERVISOR_shutdown(SHUTDOWN_reboot);
-}
-
-void machine_restart(char * __unused)
-{
-       machine_emergency_restart();
-}
+#include <xen/reboot.h>
 
-void machine_halt(void)
-{
-       machine_power_off();
-}
-
-void machine_power_off(void)
-{
-       /* We really want to get pending console data out before we die. */
-       xencons_force_flush();
-       if (pm_power_off)
-               pm_power_off();
-       HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
+MODULE_LICENSE("Dual BSD/GPL");
 
-int reboot_thru_bios = 0;      /* for dmi_scan.c */
-EXPORT_SYMBOL(machine_restart);
-EXPORT_SYMBOL(machine_halt);
-EXPORT_SYMBOL(machine_power_off);
-
-#endif /* defined(__i386__) || defined(__x86_64__) */
-
-/******************************************************************************
- * Stop/pickle callback handling.
- */
-
-/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
 
-#if defined(__i386__) || defined(__x86_64__)
-
-/* Ensure we run on the idle task page tables so that we will
-   switch page tables before running user space. This is needed
-   on architectures with separate kernel and user page tables
-   because the user page table pointer is not saved/restored. */
-static void switch_idle_mm(void)
-{
-       struct mm_struct *mm = current->active_mm;
-
-       if (mm == &init_mm)
-               return;
-
-       atomic_inc(&init_mm.mm_count);
-       switch_mm(mm, &init_mm, current);
-       current->active_mm = &init_mm;
-       mmdrop(mm);
-}
-
-static void pre_suspend(void)
-{
-       HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
-       clear_fixmap(FIX_SHARED_INFO);
-
-       xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
-       xen_start_info->console.domU.mfn =
-               mfn_to_pfn(xen_start_info->console.domU.mfn);
-}
-
-static void post_suspend(void)
-{
-       int i, j, k, fpp;
-       extern unsigned long max_pfn;
-       extern unsigned long *pfn_to_mfn_frame_list_list;
-       extern unsigned long *pfn_to_mfn_frame_list[];
-
-       set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-
-       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-
-       memset(empty_zero_page, 0, PAGE_SIZE);
-
-       HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
-               virt_to_mfn(pfn_to_mfn_frame_list_list);
-
-       fpp = PAGE_SIZE/sizeof(unsigned long);
-       for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
-               if ((j % fpp) == 0) {
-                       k++;
-                       pfn_to_mfn_frame_list_list[k] =
-                               virt_to_mfn(pfn_to_mfn_frame_list[k]);
-                       j = 0;
-               }
-               pfn_to_mfn_frame_list[k][j] =
-                       virt_to_mfn(&phys_to_machine_mapping[i]);
-       }
-       HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-}
-
-#else /* !(defined(__i386__) || defined(__x86_64__)) */
-
-#define switch_idle_mm()       ((void)0)
-#define mm_pin_all()           ((void)0)
-#define pre_suspend()          ((void)0)
-#define post_suspend()         ((void)0)
-
-#endif
-
-static int __do_suspend(void *ignore)
-{
-       int err;
-
-       extern void time_resume(void);
-
-       BUG_ON(smp_processor_id() != 0);
-       BUG_ON(in_interrupt());
-
-#if defined(__i386__) || defined(__x86_64__)
-       if (xen_feature(XENFEAT_auto_translated_physmap)) {
-               printk(KERN_WARNING "Cannot suspend in "
-                      "auto_translated_physmap mode.\n");
-               return -EOPNOTSUPP;
-       }
-#endif
-
-       err = smp_suspend();
-       if (err)
-               return err;
-
-       xenbus_suspend();
-
-       preempt_disable();
-
-       mm_pin_all();
-       local_irq_disable();
-       preempt_enable();
-
-       gnttab_suspend();
-
-       pre_suspend();
-
-       /*
-        * We'll stop somewhere inside this hypercall. When it returns,
-        * we'll start resuming after the restore.
-        */
-       HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
-
-       shutting_down = SHUTDOWN_INVALID;
-
-       post_suspend();
-
-       gnttab_resume();
-
-       irq_resume();
-
-       time_resume();
-
-       switch_idle_mm();
-
-       local_irq_enable();
-
-       xencons_resume();
-
-       xenbus_resume();
-
-       smp_resume();
-
-       return err;
-}
-
 static int shutdown_process(void *__unused)
 {
        static char *envp[] = { "HOME=/", "TERM=linux",
@@ -222,11 +26,13 @@ static int shutdown_process(void *__unused)
 
        if ((shutting_down == SHUTDOWN_POWEROFF) ||
            (shutting_down == SHUTDOWN_HALT)) {
-               if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) {
+               if (call_usermodehelper_keys("/sbin/poweroff", poweroff_argv, envp, NULL, 0) < 0) {
+#ifdef CONFIG_XEN
                        sys_reboot(LINUX_REBOOT_MAGIC1,
                                   LINUX_REBOOT_MAGIC2,
                                   LINUX_REBOOT_CMD_POWER_OFF,
                                   NULL);
+#endif /* CONFIG_XEN */
                }
        }
 
@@ -235,29 +41,21 @@ static int shutdown_process(void *__unused)
        return 0;
 }
 
-static int kthread_create_on_cpu(int (*f)(void *arg),
-                                void *arg,
-                                const char *name,
-                                int cpu)
-{
-       struct task_struct *p;
-       p = kthread_create(f, arg, name);
-       if (IS_ERR(p))
-               return PTR_ERR(p);
-       kthread_bind(p, cpu);
-       wake_up_process(p);
-       return 0;
-}
 
 static void __shutdown_handler(void *unused)
 {
        int err;
 
+#ifdef CONFIG_XEN
        if (shutting_down != SHUTDOWN_SUSPEND)
                err = kernel_thread(shutdown_process, NULL,
                                    CLONE_FS | CLONE_FILES);
        else
                err = kthread_create_on_cpu(__do_suspend, NULL, "suspend", 0);
+#else /* !CONFIG_XEN */
+               err = kernel_thread(shutdown_process, NULL,
+                                   CLONE_FS | CLONE_FILES);
+#endif /* !CONFIG_XEN */
 
        if (err < 0) {
                printk(KERN_WARNING "Error creating shutdown process (%d): "
@@ -273,6 +71,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
        struct xenbus_transaction xbt;
        int err;
 
+       int cad_pid = 1; 
+
        if (shutting_down != SHUTDOWN_INVALID)
                return;
 
@@ -298,7 +98,7 @@ static void shutdown_handler(struct xenbus_watch *watch,
        if (strcmp(str, "poweroff") == 0)
                shutting_down = SHUTDOWN_POWEROFF;
        else if (strcmp(str, "reboot") == 0)
-               ctrl_alt_del();
+               kill_proc(cad_pid, SIGINT, 1);
        else if (strcmp(str, "suspend") == 0)
                shutting_down = SHUTDOWN_SUSPEND;
        else if (strcmp(str, "halt") == 0)
@@ -378,6 +178,11 @@ static int __init setup_shutdown_event(void)
                .notifier_call = setup_shutdown_watcher
        };
        register_xenstore_notifier(&xenstore_notifier);
+
+       if (!is_initial_xendomain()) {
+               xenbus_write(XBT_NIL, "control", "reboot_module", "installed");
+       }
+
        return 0;
 }
 
diff --git a/linux-2.6-xen-sparse/include/xen/reboot.h b/linux-2.6-xen-sparse/include/xen/reboot.h
new file mode 100644 (file)
index 0000000..9763723
--- /dev/null
@@ -0,0 +1,19 @@
+#define SHUTDOWN_INVALID  -1
+#define SHUTDOWN_POWEROFF  0
+#define SHUTDOWN_SUSPEND   2
+/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+ * report a crash, not be instructed to crash!
+ * HALT is the same as POWEROFF, as far as we're concerned.  The tools use
+ * the distinction when we return the reason code to them.
+ */
+#define SHUTDOWN_HALT      4
+
+/******************************************************************************
+ * Stop/pickle callback handling.
+ */
+
+/* Ignore multiple shutdown requests. */
+static int shutting_down = SHUTDOWN_INVALID;
+
+int kthread_create_on_cpu(int (*f)(void *), void *, const char *, int);
+int __do_suspend(void *);
index 95099e765536565896fe5e906590c2957c519a9e..0afe4355c377a96e56a2904a7f775c024ed8058d 100644 (file)
@@ -282,6 +282,7 @@ class HVMImageHandler(ImageHandler):
         log.debug("apic           = %d", self.apic)
 
         self.register_shutdown_watch()
+        self.register_reboot_module_watch()
 
         return xc.hvm_build(dom            = self.vm.getDomid(),
                             image          = self.kernel,
@@ -416,6 +417,7 @@ class HVMImageHandler(ImageHandler):
 
     def destroy(self):
         self.unregister_shutdown_watch();
+        self.unregister_reboot_module_watch();
         if not self.pid:
             return
         os.kill(self.pid, signal.SIGKILL)
@@ -458,6 +460,39 @@ class HVMImageHandler(ImageHandler):
 
         return 1 # Keep watching
 
+    def register_reboot_module_watch(self):
+        """ add xen store watch on control/reboot_module """
+        self.rebootModuleWatch = xswatch(self.vm.dompath + "/control/reboot_module", \
+                                    self.hvm_reboot_module)
+        log.debug("hvm reboot module watch registered")
+
+    def unregister_reboot_module_watch(self):
+        """Remove the watch on the control/reboot_module, if any. Nothrow
+        guarantee."""
+
+        try:
+            if self.rebootModuleWatch:
+                self.rebootModuleWatch.unwatch()
+        except:
+            log.exception("Unwatching hvm reboot module watch failed.")
+        self.rebootModuleWatch = None
+        log.debug("hvm reboot module watch unregistered")
+
+    def hvm_reboot_module(self, _):
+        """ watch call back on node control/reboot_module,
+            if node changed, this function will be called
+        """
+        xd = xen.xend.XendDomain.instance()
+        vm = xd.domain_lookup( self.vm.getDomid() )
+
+        reboot_module_status = vm.readDom('control/reboot_module')
+        log.debug("hvm_reboot_module fired, module status=%s", reboot_module_status)
+        if reboot_module_status == 'installed':
+            self.unregister_shutdown_watch()
+
+        return 1 # Keep watching
+
+
 class IA64_HVM_ImageHandler(HVMImageHandler):
 
     ostype = "hvm"
index 95d558f77b1d9dbcec21d9edffa22ea3ca3fc0ed..119016f53162bcfd89629a3745d7127b4e2ecbe7 100644 (file)
@@ -4,3 +4,4 @@ obj-m += platform-pci/
 obj-m += xenbus/
 obj-m += blkfront/
 obj-m += netfront/
+obj-m += util/
old mode 100644 (file)
new mode 100755 (executable)
index ce0a142..5b6b864
@@ -22,6 +22,7 @@ done
 ln -sf ${XL}/drivers/xen/core/gnttab.c platform-pci
 ln -sf ${XL}/drivers/xen/core/features.c platform-pci
 ln -sf ${XL}/drivers/xen/core/xen_proc.c xenbus
+ln -sf ${XL}/drivers/xen/core/reboot.c util
 
 mkdir -p include
 mkdir -p include/xen
diff --git a/unmodified_drivers/linux-2.6/util/Kbuild b/unmodified_drivers/linux-2.6/util/Kbuild
new file mode 100644 (file)
index 0000000..35495d8
--- /dev/null
@@ -0,0 +1,3 @@
+include $(M)/overrides.mk
+
+obj-m := reboot.o