x86/livepatch: Prevent patching with active waitqueues
authorAndrew Cooper <andrew.cooper3@citrix.com>
Mon, 25 Nov 2019 15:25:18 +0000 (16:25 +0100)
committerJan Beulich <jbeulich@suse.com>
Mon, 25 Nov 2019 15:25:18 +0000 (16:25 +0100)
The safety of livepatching depends on every stack having been unwound, but
there is one corner case where this is not true.  The Sharing/Paging/Monitor
infrastructure may use waitqueues, which copy the stack frame sideways and
longjmp() to a different vcpu.

This case is rare, and can be worked around by pausing the offending
domain(s), waiting for their rings to drain, then performing a livepatch.

In the case that there is an active waitqueue, fail the livepatch attempt with
-EBUSY, which is preforable to the fireworks which occur from trying to unwind
the old stack frame at a later point.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com>
master commit: ca4cd3668237d50a0b33b48e7de7f93d9475120d
master date: 2019-11-22 17:05:43 +0000

xen/arch/arm/livepatch.c
xen/arch/x86/livepatch.c
xen/common/livepatch.c
xen/include/xen/livepatch.h

index 279d52cc6cd78125f57a007d6a711a48419a25a3..7184535c85e93e2b5a198da6f57aeec6becbc7e0 100644 (file)
 
 void *vmap_of_xen_text;
 
+int arch_livepatch_safety_check(void)
+{
+    return 0;
+}
+
 int arch_livepatch_quiesce(void)
 {
     mfn_t text_mfn;
index 406eb910cc8fd3bdb1064348890d2bf6936746d3..fd98d2bf03998c07a00fc109f22f2d7a1e331866 100644 (file)
 #include <xen/vmap.h>
 #include <xen/livepatch_elf.h>
 #include <xen/livepatch.h>
+#include <xen/sched.h>
 
 #include <asm/nmi.h>
 #include <asm/livepatch.h>
 
+static bool has_active_waitqueue(const struct vm_event_domain *ved)
+{
+    /* ved may be xzalloc()'d without INIT_LIST_HEAD() yet. */
+    return (ved && !list_head_is_null(&ved->wq.list) &&
+            !list_empty(&ved->wq.list));
+}
+
+/*
+ * x86's implementation of waitqueue violates the livepatching safey principle
+ * of having unwound every CPUs stack before modifying live content.
+ *
+ * Search through every domain and check that no vCPUs have an active
+ * waitqueue.
+ */
+int arch_livepatch_safety_check(void)
+{
+    struct domain *d;
+
+    for_each_domain ( d )
+    {
+        if ( has_active_waitqueue(d->vm_event_share) )
+            goto fail;
+        if ( has_active_waitqueue(d->vm_event_paging) )
+            goto fail;
+        if ( has_active_waitqueue(d->vm_event_monitor) )
+            goto fail;
+    }
+
+    return 0;
+
+ fail:
+    printk(XENLOG_ERR LIVEPATCH "%pd found with active waitqueue\n", d);
+    return -EBUSY;
+}
+
 int arch_livepatch_quiesce(void)
 {
     /* Disable WP to allow changes to read-only pages. */
index d6eaae6d3be4d17ec38dd787779b95c7bff8176c..d69fdb36662d67b8cdbe5d1d2b0149b4c0c7fce1 100644 (file)
@@ -1060,6 +1060,14 @@ static int apply_payload(struct payload *data)
     unsigned int i;
     int rc;
 
+    rc = arch_livepatch_safety_check();
+    if ( rc )
+    {
+        printk(XENLOG_ERR LIVEPATCH "%s: Safety checks failed: %d\n",
+               data->name, rc);
+        return rc;
+    }
+
     printk(XENLOG_INFO LIVEPATCH "%s: Applying %u functions\n",
             data->name, data->nfuncs);
 
index 98ec01216bc3634a82f029b177011b0cc8fbfcf5..0a72ccc3caa44b699ae387f35982fe2b6530478e 100644 (file)
@@ -103,6 +103,7 @@ static inline int livepatch_verify_distance(const struct livepatch_func *func)
  * These functions are called around the critical region patching live code,
  * for an architecture to take make appropratie global state adjustments.
  */
+int arch_livepatch_safety_check(void);
 int arch_livepatch_quiesce(void);
 void arch_livepatch_revive(void);