finalize-staged: Ensure /boot automount doesn't expire
authorDan Nicholson <dbn@endlessos.org>
Wed, 16 Feb 2022 22:58:58 +0000 (15:58 -0700)
committerDan Nicholson <dbn@endlessos.org>
Tue, 30 Aug 2022 15:16:39 +0000 (09:16 -0600)
If `/boot` is an automount, then the unit will be stopped as soon as the
automount expires. That's would defeat the purpose of using systemd to
delay finalizing the deployment until shutdown. This is not uncommon as
`systemd-gpt-auto-generator` will create an automount unit for `/boot`
when it's the EFI System Partition and there's no fstab entry.

To ensure that systemd doesn't stop the service early when the `/boot`
automount expires, introduce a new unit that holds `/boot` open until
it's sent `SIGTERM`. This uses a new `--hold` option for
`finalize-staged` that loads but doesn't lock the sysroot. A separate
unit is used since we want the process to remain active throughout the
finalization run in `ExecStop`. That wouldn't work if it was specified
in `ExecStart` in the same unit since it would be killed before the
`ExecStop` action was run.

Fixes: #2543
Makefile-boot.am
src/boot/ostree-finalize-staged-hold.service [new file with mode: 0644]
src/boot/ostree-finalize-staged.service
src/ostree/ot-admin-builtin-finalize-staged.c
tests/inst/src/destructive.rs
tests/kolainst/destructive/boot-automount.sh [new file with mode: 0755]

index e42e5180bfaee647f933156cdd5157059a0831ca..90f9804834c4a41660bf0be07a5b99b1545ebfae 100644 (file)
@@ -41,6 +41,7 @@ systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \
        src/boot/ostree-boot-complete.service \
        src/boot/ostree-finalize-staged.service \
        src/boot/ostree-finalize-staged.path \
+       src/boot/ostree-finalize-staged-hold.service \
        $(NULL)
 systemdtmpfilesdir = $(prefix)/lib/tmpfiles.d
 dist_systemdtmpfiles_DATA = src/boot/ostree-tmpfiles.conf
@@ -70,6 +71,7 @@ EXTRA_DIST += src/boot/dracut/module-setup.sh \
        src/boot/ostree-finalize-staged.path \
        src/boot/ostree-remount.service \
        src/boot/ostree-finalize-staged.service \
+       src/boot/ostree-finalize-staged-hold.service \
        src/boot/grub2/grub2-15_ostree \
        src/boot/grub2/ostree-grub-generator \
        $(NULL)
diff --git a/src/boot/ostree-finalize-staged-hold.service b/src/boot/ostree-finalize-staged-hold.service
new file mode 100644 (file)
index 0000000..85b5d54
--- /dev/null
@@ -0,0 +1,35 @@
+# Copyright (C) 2018 Red Hat, Inc.
+# Copyright (C) 2022 Endless OS Foundation LLC
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <https://www.gnu.org/licenses/>.
+
+# See https://github.com/ostreedev/ostree/pull/2543 for background.
+[Unit]
+Description=Hold /boot Open for OSTree Finalize Staged Deployment
+Documentation=man:ostree(1)
+ConditionPathExists=/run/ostree-booted
+DefaultDependencies=no
+
+RequiresMountsFor=/sysroot /boot
+After=local-fs.target
+Before=basic.target final.target
+
+[Service]
+Type=exec
+
+# This is explicitly run in the root namespace to ensure an automounted
+# /boot doesn't time out since autofs doesn't handle mount namespaces.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2056090
+ExecStart=+/usr/bin/ostree admin finalize-staged --hold
index 2f28bbb706864cee5aa9fef65e3fbd90c04158ac..63621ce192e681bc6ede99158c9c2dc60616201f 100644 (file)
@@ -29,6 +29,11 @@ Before=basic.target final.target
 After=systemd-journal-flush.service
 Conflicts=final.target
 
+# Start the hold unit and ensure it stays active throughout this
+# service.
+Wants=ostree-finalize-staged-hold.service
+After=ostree-finalize-staged-hold.service
+
 [Service]
 Type=oneshot
 RemainAfterExit=yes
index eedffdded86f324fa5cb6378eaa11a3aa641302e..ed4f20d7592c5bd99b9fe64de250471be847ccdc 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2018 Red Hat, Inc.
+ * Copyright (C) 2022 Endless OS Foundation LLC
  *
  * SPDX-License-Identifier: LGPL-2.0+
  *
@@ -19,8 +20,9 @@
 
 #include "config.h"
 
-#include "config.h"
-
+#include <glib-unix.h>
+#include <sched.h>
+#include <signal.h>
 #include <stdlib.h>
 
 #include "ot-main.h"
 #include "ostree-cmd-private.h"
 #include "ostree.h"
 
+static gboolean opt_hold;
+
 static GOptionEntry options[] = {
+  { "hold", 0, 0, G_OPTION_ARG_NONE, &opt_hold, "Hold /boot open during finalization", NULL },
   { NULL }
 };
 
+static gboolean
+sigterm_cb (gpointer user_data)
+{
+  gboolean *running = user_data;
+  g_print ("Received SIGTERM, exiting\n");
+  *running = FALSE;
+  g_main_context_wakeup (NULL);
+  return G_SOURCE_REMOVE;
+}
+
 /* Called by ostree-finalize-staged.service, and in turn
  * invokes a cmdprivate function inside the shared library.
  */
@@ -50,13 +65,56 @@ ot_admin_builtin_finalize_staged (int argc, char **argv, OstreeCommandInvocation
 
   g_autoptr(GOptionContext) context = g_option_context_new ("");
   g_autoptr(OstreeSysroot) sysroot = NULL;
+
+  /* First parse the args without loading the sysroot to see what options are
+   * set. */
   if (!ostree_admin_option_context_parse (context, options, &argc, &argv,
-                                          OSTREE_ADMIN_BUILTIN_FLAG_SUPERUSER,
+                                          OSTREE_ADMIN_BUILTIN_FLAG_NO_LOAD,
                                           invocation, &sysroot, cancellable, error))
     return FALSE;
 
-  if (!ostree_cmd__private__()->ostree_finalize_staged (sysroot, cancellable, error))
-    return FALSE;
+  if (opt_hold)
+    {
+      /* Load the sysroot unlocked so that a separate namespace isn't
+       * created. */
+      if (!ostree_admin_sysroot_load (sysroot,
+                                      OSTREE_ADMIN_BUILTIN_FLAG_SUPERUSER | OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED,
+                                      cancellable, error))
+        return FALSE;
+
+      /* In case it's an automount, open /boot so that the automount doesn't
+       * expire until before this process exits. If it did expire and got
+       * unmounted, the service would be stopped and the deployment would be
+       * finalized earlier than expected.
+       */
+      int sysroot_fd = ostree_sysroot_get_fd (sysroot);
+      glnx_autofd int boot_fd = -1;
+      g_debug ("Opening /boot directory");
+      if (!glnx_opendirat (sysroot_fd, "boot", TRUE, &boot_fd, error))
+        return FALSE;
+
+      /* We want to keep /boot open until the deployment is finalized during
+       * system shutdown, so block on SIGTERM under the assumption that it will
+       * be received when systemd stops the unit.
+       */
+      gboolean running = TRUE;
+      g_unix_signal_add (SIGTERM, sigterm_cb, &running);
+      g_print ("Waiting for SIGTERM\n");
+      while (running)
+        g_main_context_iteration (NULL, TRUE);
+    }
+  else
+    {
+      /* Load the sysroot with the normal flags and actually finalize the
+       * deployment. */
+      if (!ostree_admin_sysroot_load (sysroot,
+                                      OSTREE_ADMIN_BUILTIN_FLAG_SUPERUSER,
+                                      cancellable, error))
+        return FALSE;
+
+      if (!ostree_cmd__private__()->ostree_finalize_staged (sysroot, cancellable, error))
+        return FALSE;
+    }
 
   return TRUE;
 }
index da188bcb089d8e64b0524b8ae947430f3910178c..d0c9c9dc2baec1c9044c94abc740c4f2fa044883 100644 (file)
@@ -463,6 +463,7 @@ fn impl_transaction_test<M: AsRef<str>>(
             "
             systemctl stop rpm-ostreed
             systemctl stop ostree-finalize-staged
+            systemctl stop ostree-finalize-staged-hold
             ostree reset testrepo:${testref} ${booted_commit}
             rpm-ostree cleanup -pbrm
             ",
@@ -504,7 +505,8 @@ fn impl_transaction_test<M: AsRef<str>>(
                 InterruptStrategy::Force(ForceInterruptStrategy::Kill9) => {
                     bash!(
                         "systemctl kill -s KILL rpm-ostreed || true
-                      systemctl kill -s KILL ostree-finalize-staged || true"
+                      systemctl kill -s KILL ostree-finalize-staged || true
+                      systemctl kill -s KILL ostree-finalize-staged-hold || true"
                     )?;
                     live_strategy = Some(strategy);
                 }
@@ -528,7 +530,8 @@ fn impl_transaction_test<M: AsRef<str>>(
                 InterruptStrategy::Polite(PoliteInterruptStrategy::Stop) => {
                     bash!(
                         "systemctl stop rpm-ostreed || true
-                      systemctl stop ostree-finalize-staged || true"
+                      systemctl stop ostree-finalize-staged || true
+                      systemctl stop ostree-finalize-staged-hold || true"
                     )?;
                     live_strategy = Some(strategy);
                 }
diff --git a/tests/kolainst/destructive/boot-automount.sh b/tests/kolainst/destructive/boot-automount.sh
new file mode 100755 (executable)
index 0000000..d6d1732
--- /dev/null
@@ -0,0 +1,83 @@
+#!/bin/bash
+# https://github.com/ostreedev/ostree/issues/2543
+set -xeuo pipefail
+
+. ${KOLA_EXT_DATA}/libinsttest.sh
+
+case "${AUTOPKGTEST_REBOOT_MARK:-}" in
+  "")
+    # Ensure boot is mount point
+    mountpoint /boot
+
+    # Create an automount unit with an extremely short timeout
+    cat > /etc/systemd/system/boot.automount <<"EOF"
+[Automount]
+Where=/boot
+TimeoutIdleSec=1
+
+[Install]
+WantedBy=local-fs.target
+EOF
+    systemctl daemon-reload
+    systemctl enable boot.automount
+
+    # Unmount /boot, start the automount unit, and ensure the units are
+    # in the correct states.
+    umount /boot
+    systemctl start boot.automount
+    boot_state=$(systemctl show -P ActiveState boot.mount)
+    boot_auto_state=$(systemctl show -P ActiveState boot.automount)
+    assert_streq "${boot_state}" inactive
+    assert_streq "${boot_auto_state}" active
+
+    # Trigger a new staged deployment and check that the relevant units
+    # are enabled.
+    ostree admin deploy --stage --karg-append=somedummykarg=1 "${host_commit}"
+    rpm-ostree status --json
+    deployment_staged=$(rpmostree_query_json '.deployments[0].staged')
+    assert_streq "${deployment_staged}" true
+    test -f /run/ostree/staged-deployment
+    finalize_staged_state=$(systemctl show -P ActiveState ostree-finalize-staged.service)
+    finalize_staged_hold_state=$(systemctl show -P ActiveState ostree-finalize-staged-hold.service)
+    assert_streq "${finalize_staged_state}" active
+    assert_streq "${finalize_staged_hold_state}" active
+
+    # Sleep 1 second to ensure the boot automount idle timeout has
+    # passed and then check that /boot is still mounted.
+    sleep 1
+    boot_state=$(systemctl show -P ActiveState boot.mount)
+    assert_streq "${boot_state}" active
+
+    /tmp/autopkgtest-reboot "2"
+    ;;
+  "2")
+    rpm-ostree status --json
+    deployment_staged=$(rpmostree_query_json '.deployments[0].staged')
+    assert_streq "${deployment_staged}" false
+    test ! -f /run/ostree/staged-deployment
+    assert_file_has_content_literal /proc/cmdline somedummykarg=1
+
+    # Check that the finalize and hold services succeeded in the
+    # previous boot. Dump them to the test log to help debugging.
+    prepare_tmpdir
+    journalctl -b -1 -o short-monotonic \
+        -u ostree-finalize-staged.service \
+        -u ostree-finalize-staged-hold.service \
+        -u boot.mount \
+        -u boot.automount \
+        > logs.txt
+    cat logs.txt
+    assert_file_has_content logs.txt 'ostree-finalize-staged.service: \(Succeeded\|Deactivated successfully\)'
+    assert_file_has_content logs.txt 'ostree-finalize-staged-hold.service: \(Succeeded\|Deactivated successfully\)'
+
+    # Check that the hold service remained active and kept /boot mounted until
+    # the finalize service completed.
+    finalize_stopped=$(journalctl -b -1 -o json -g Stopped -u ostree-finalize-staged.service | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
+    hold_stopping=$(journalctl -b -1 -o json -g Stopping -u ostree-finalize-staged-hold.service | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
+    hold_stopped=$(journalctl -b -1 -o json -g Stopped -u ostree-finalize-staged-hold.service | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
+    boot_unmounting=$(journalctl -b -1 -o json -g Unmounting -u boot.mount | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
+    test "${finalize_stopped}" -lt "${hold_stopping}"
+    test "${hold_stopped}" -lt "${boot_unmounting}"
+    ;;
+  *) fatal "Unexpected AUTOPKGTEST_REBOOT_MARK=${AUTOPKGTEST_REBOOT_MARK}" ;;
+esac