Use fcntl F_SETLK, rather than writing our pid into a "lock" file.
That way if we crash we don't leave the lockfile lying about. Callers
now need to keep the fd for our lockfile. (We don't use flock because
we don't want anyone who inherits this fd across fork to end up with a
handle onto the lock.)
While we are here:
* Move the lockfile to /var/run/xen
* De-duplicate the calculation of the pathname
* Compute the buffer size for the pathname so that it will definitely
not overrun (and use the computed value everywhere)
* Fix various error handling bugs
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <unistd.h>
+#include <fcntl.h>
+
#include "xc_private.h"
#include "xenguest.h"
-#define SUSPEND_LOCK_FILE "/var/lib/xen/suspend_evtchn"
-static int lock_suspend_event(xc_interface *xch, int domid)
+#define SUSPEND_LOCK_FILE "/var/run/xen/suspend-evtchn-%d.lock"
+
+/*
+ * locking
+ */
+
+#define ERR(x) do{ \
+ ERROR("Can't " #x " lock file for suspend event channel %s: %s\n", \
+ suspend_file, strerror(errno)); \
+ goto err; \
+}while(0)
+
+#define SUSPEND_FILE_BUFLEN (sizeof(SUSPEND_LOCK_FILE) + 10)
+
+static void get_suspend_file(char buf[SUSPEND_FILE_BUFLEN], int domid)
{
- int fd, rc;
- mode_t mask;
- char buf[128];
- char suspend_file[256];
-
- snprintf(suspend_file, sizeof(suspend_file), "%s_%d_lock.d",
- SUSPEND_LOCK_FILE, domid);
- mask = umask(022);
- fd = open(suspend_file, O_CREAT | O_EXCL | O_RDWR, 0666);
- if (fd < 0)
- {
- ERROR("Can't create lock file for suspend event channel %s\n",
- suspend_file);
- return -EINVAL;
+ snprintf(buf, sizeof(buf), SUSPEND_LOCK_FILE, domid);
+}
+
+static int lock_suspend_event(xc_interface *xch, int domid, int *lockfd)
+{
+ int fd = -1, r;
+ char suspend_file[SUSPEND_FILE_BUFLEN];
+ struct stat ours, theirs;
+ struct flock fl;
+
+ get_suspend_file(suspend_file, domid);
+
+ *lockfd = -1;
+
+ for (;;) {
+ if (fd >= 0)
+ close (fd);
+
+ fd = open(suspend_file, O_CREAT | O_RDWR, 0600);
+ if (fd < 0)
+ ERR("create");
+
+ r = fcntl(fd, F_SETFD, FD_CLOEXEC);
+ if (r)
+ ERR("fcntl F_SETFD FD_CLOEXEC");
+
+ memset(&fl, 0, sizeof(fl));
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_len = 1;
+ r = fcntl(fd, F_SETLK, &fl);
+ if (r)
+ ERR("fcntl F_SETLK");
+
+ r = fstat(fd, &ours);
+ if (r)
+ ERR("fstat");
+
+ r = stat(suspend_file, &theirs);
+ if (r) {
+ if (errno == ENOENT)
+ /* try again */
+ continue;
+ ERR("stat");
+ }
+
+ if (ours.st_ino != theirs.st_ino)
+ /* someone else must have removed it while we were locking it */
+ continue;
+
+ break;
}
- umask(mask);
- snprintf(buf, sizeof(buf), "%10ld", (long)getpid());
- rc = write_exact(fd, buf, strlen(buf));
- close(fd);
+ *lockfd = fd;
+ return 0;
- return rc;
+ err:
+ if (fd >= 0)
+ close(fd);
+
+ return -1;
}
-static int unlock_suspend_event(xc_interface *xch, int domid)
+static int unlock_suspend_event(xc_interface *xch, int domid, int *lockfd)
{
- int fd, pid, n;
- char buf[128];
- char suspend_file[256];
+ int r;
+ char suspend_file[SUSPEND_FILE_BUFLEN];
- snprintf(suspend_file, sizeof(suspend_file), "%s_%d_lock.d",
- SUSPEND_LOCK_FILE, domid);
- fd = open(suspend_file, O_RDWR);
+ if (*lockfd < 0)
+ return 0;
- if (fd < 0)
- return -EINVAL;
+ get_suspend_file(suspend_file, domid);
- n = read(fd, buf, 127);
+ r = unlink(suspend_file);
+ if (r)
+ ERR("unlink");
- close(fd);
+ r = close(*lockfd);
+ *lockfd = -1;
+ if (r)
+ ERR("close");
- if (n > 0)
- {
- sscanf(buf, "%d", &pid);
- /* We are the owner, so we can simply delete the file */
- if (pid == getpid())
- {
- unlink(suspend_file);
- return 0;
- }
- }
+ err:
+ if (*lockfd >= 0)
+ close(*lockfd);
- return -EPERM;
+ return -1;
}
int xc_await_suspend(xc_interface *xch, xc_evtchn *xce, int suspend_evtchn)
return 0;
}
-int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn)
+/* Internal callers are allowed to call this with suspend_evtchn<0
+ * but *lockfd>0. */
+int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce,
+ int domid, int suspend_evtchn, int *lockfd)
{
if (suspend_evtchn >= 0)
xc_evtchn_unbind(xce, suspend_evtchn);
- return unlock_suspend_event(xch, domid);
+ return unlock_suspend_event(xch, domid, lockfd);
}
-int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce, int domid, int port)
+int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce,
+ int domid, int port, int *lockfd)
{
int rc, suspend_evtchn = -1;
- if (lock_suspend_event(xch, domid))
- return -EINVAL;
+ if (lock_suspend_event(xch, domid, lockfd)) {
+ errno = EINVAL;
+ goto cleanup;
+ }
suspend_evtchn = xc_evtchn_bind_interdomain(xce, domid, port);
if (suspend_evtchn < 0) {
return suspend_evtchn;
cleanup:
- if (suspend_evtchn != -1)
- xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+ xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn, lockfd);
return -1;
}
-int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce, int domid, int port)
+int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce,
+ int domid, int port, int *lockfd)
{
int suspend_evtchn;
- suspend_evtchn = xc_suspend_evtchn_init_sane(xch, xce, domid, port);
+ suspend_evtchn = xc_suspend_evtchn_init_sane(xch, xce, domid, port, lockfd);
if (suspend_evtchn < 0)
return suspend_evtchn;
int target,
const char *image_name);
-int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn);
+/*
+ * Sets *lockfd to -1.
+ * Has deallocated everything even on error.
+ */
+int xc_suspend_evtchn_release(xc_interface *xch, xc_evtchn *xce, int domid, int suspend_evtchn, int *lockfd);
/**
* This function eats the initial notification.
* xce must not be used for anything else
+ * See xc_suspend_evtchn_init_sane re lockfd.
*/
-int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce, int domid, int port);
+int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xc_evtchn *xce,
+ int domid, int port, int *lockfd);
/* xce must not be used for anything else */
int xc_await_suspend(xc_interface *xch, xc_evtchn *xce, int suspend_evtchn);
/**
* The port will be signaled immediately after this call
* The caller should check the domain status and look for the next event
+ * On success, *lockfd will be set to >=0 and *lockfd must be preserved
+ * and fed to xc_suspend_evtchn_release. (On error *lockfd is
+ * undefined and xc_suspend_evtchn_release is not allowed.)
*/
-int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce, int domid, int port);
+int xc_suspend_evtchn_init_sane(xc_interface *xch, xc_evtchn *xce,
+ int domid, int port, int *lockfd);
int xc_get_bit_size(xc_interface *xch,
const char *image_name, const char *cmdline,
| (dss->hvm ? XCFLAGS_HVM : 0);
dss->suspend_eventchn = -1;
+ dss->guest_evtchn_lockfd = -1;
dss->guest_responded = 0;
dss->dm_savefile = libxl__device_model_savefile(gc, domid);
if (port >= 0) {
dss->suspend_eventchn =
- xc_suspend_evtchn_init_exclusive(CTX->xch, dss->xce, dss->domid, port);
+ xc_suspend_evtchn_init_exclusive(CTX->xch, dss->xce,
+ dss->domid, port, &dss->guest_evtchn_lockfd);
if (dss->suspend_eventchn < 0)
LOG(WARN, "Suspend event channel initialization failed");
if (dss->suspend_eventchn > 0)
xc_suspend_evtchn_release(CTX->xch, dss->xce, domid,
- dss->suspend_eventchn);
+ dss->suspend_eventchn, &dss->guest_evtchn_lockfd);
if (dss->xce != NULL)
xc_evtchn_close(dss->xce);
/* private */
xc_evtchn *xce; /* event channel handle */
int suspend_eventchn;
+ int guest_evtchn_lockfd;
int hvm;
int xcflags;
int guest_responded;
extern int xs_suspend_evtchn_port(int domid);
-static int suspend_guest(xc_interface *xch, xc_evtchn *xce, int domid, int *evtchn)
+static int suspend_guest(xc_interface *xch, xc_evtchn *xce, int domid,
+ int *evtchn, int *lockfd)
{
int port, rc, suspend_evtchn = -1;
+ *lockfd = -1;
+
if (!evtchn)
return -1;
fprintf(stderr, "DOM%d: No suspend port, try live migration\n", domid);
goto failed;
}
- suspend_evtchn = xc_suspend_evtchn_init_exclusive(xch, xce, domid, port);
+ suspend_evtchn = xc_suspend_evtchn_init_exclusive(xch, xce, domid,
+ port, lockfd);
if (suspend_evtchn < 0)
{
fprintf(stderr, "Suspend evtchn initialization failed\n");
failed:
if (suspend_evtchn != -1)
- xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+ xc_suspend_evtchn_release(xch, xce, domid,
+ suspend_evtchn, lockfd);
return -1;
}
}
else if (status & PG_OFFLINE_OWNED)
{
- int result, suspend_evtchn = -1;
+ int result, suspend_evtchn = -1, suspend_lockfd = -1;
xc_evtchn *xce;
xce = xc_evtchn_open(NULL, 0);
}
domid = status >> PG_OFFLINE_OWNER_SHIFT;
- if (suspend_guest(xch, xce, domid, &suspend_evtchn))
+ if (suspend_guest(xch, xce, domid,
+ &suspend_evtchn, &suspend_lockfd))
{
fprintf(stderr, "Failed to suspend guest %d for"
" mfn %lx\n", domid, mfn);
mfn, domid);
}
xc_domain_resume(xch, domid, 1);
- xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn);
+ xc_suspend_evtchn_release(xch, xce, domid,
+ suspend_evtchn, &suspend_lockfd);
xc_evtchn_close(xce);
}
break;
checkpoint_domtype domtype;
int fd;
- int suspend_evtchn;
+ int suspend_evtchn, suspend_lockfd;
char* errstr;
s->fd = -1;
s->suspend_evtchn = -1;
+ s->suspend_lockfd = -1;
s->errstr = NULL;
return -1;
}
- s->suspend_evtchn = xc_suspend_evtchn_init_exclusive(s->xch, s->xce, s->domid, port);
+ s->suspend_evtchn = xc_suspend_evtchn_init_exclusive(s->xch, s->xce,
+ s->domid, port, &s->suspend_lockfd);
if (s->suspend_evtchn < 0) {
s->errstr = "failed to bind suspend event channel";
return -1;
{
/* TODO: teach xen to clean up if port is unbound */
if (s->xce != NULL && s->suspend_evtchn >= 0) {
- xc_suspend_evtchn_release(s->xch, s->xce, s->domid, s->suspend_evtchn);
+ xc_suspend_evtchn_release(s->xch, s->xce, s->domid,
+ s->suspend_evtchn, &s->suspend_lockfd);
s->suspend_evtchn = -1;
}
}
main(int argc, char **argv)
{
unsigned int maxit, max_f, lflags;
- int io_fd, ret, port;
+ int io_fd, ret, port, suspend_lockfd = -1;
struct save_callbacks callbacks;
xentoollog_level lvl;
xentoollog_logger *l;
else
{
si.suspend_evtchn =
- xc_suspend_evtchn_init_exclusive(si.xch, si.xce, si.domid, port);
+ xc_suspend_evtchn_init_exclusive(si.xch, si.xce, si.domid,
+ port, &suspend_lockfd);
if (si.suspend_evtchn < 0)
warnx("suspend event channel initialization failed, "
&callbacks, !!(si.flags & XCFLAGS_HVM), 0);
if (si.suspend_evtchn > 0)
- xc_suspend_evtchn_release(si.xch, si.xce, si.domid, si.suspend_evtchn);
+ xc_suspend_evtchn_release(si.xch, si.xce, si.domid,
+ si.suspend_evtchn, &suspend_lockfd);
if (si.xce > 0)
xc_evtchn_close(si.xce);