--- /dev/null
+//! Test that interrupting an upgrade is safe.
+//!
+//! This test builds on coreos-assembler's "external tests":
+//! https://github.com/coreos/coreos-assembler/blob/master/mantle/kola/README-kola-ext.md
+//! Key to this in particular is coreos-assembler implementing the Debian autopkgtest reboot API.
+//!
+//! The basic model of this test is:
+//!
+//! Copy the OS content in to an archive repository, and generate a "synthetic"
+//! update for it by randomly mutating ELF files. Time how long upgrading
+//! to that takes, to use as a baseline in a range of time we will target
+//! for interrupt.
+//!
+//! Start a webserver, pointing rpm-ostree at the updated content. We
+//! alternate between a few "interrupt strategies", from `kill -9` on
+//! rpm-ostreed, or rebooting normally, or an immediate forced reboot
+//! (with no filesystem sync).
+//!
+//! The state of the tests is passed by serializing JSON into the
+//! AUTOPKGTEST_REBOOT_MARK.
+
+use anyhow::{Context, Result};
+use commandspec::sh_execute;
+use rand::seq::SliceRandom;
+use rand::Rng;
+use serde::{Deserialize, Serialize};
+use std::collections::BTreeMap;
+use std::io::Write;
+use std::path::Path;
+use std::time;
+use strum::IntoEnumIterator;
+use strum_macros::EnumIter;
+
+use crate::rpmostree;
+use crate::test::*;
+
+const ORIGREF: &'static str = "orig-booted";
+const TESTREF: &'static str = "testcontent";
+const TDATAPATH: &'static str = "/var/tmp/ostree-test-transaction-data.json";
+const SRVREPO: &'static str = "/var/tmp/ostree-test-srv";
+// Percentage of ELF files to change per update
+const TREEGEN_PERCENTAGE: u32 = 15;
+/// Total number of reboots
+const ITERATIONS: u32 = 10;
+/// Try at most this number of times per iteration to interrupt
+const ITERATION_RETRIES: u32 = 15;
+// We mostly want to test forced interrupts since those are
+// most likely to break.
+const FORCE_INTERRUPT_PERCENTAGE: u32 = 85;
+/// Multiply the average cycle time by this to ensure we sometimes
+/// fail to interrupt too.
+const FORCE_REBOOT_AFTER_MUL: f64 = 1.1f64;
+/// Amount of time in seconds we will delay each web request.
+/// FIXME: this should be a function of total number of objects or so
+const WEBSERVER_DELAY_SECS: f64 = 0.005;
+
+/// We choose between these at random
+#[derive(EnumIter, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+enum PoliteInterruptStrategy {
+ None,
+ Stop,
+ Reboot,
+}
+
+/// We choose between these at random
+#[derive(EnumIter, Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+enum ForceInterruptStrategy {
+ Kill9,
+ Reboot,
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+enum InterruptStrategy {
+ Polite(PoliteInterruptStrategy),
+ Force(ForceInterruptStrategy),
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+enum UpdateResult {
+ NotCompleted,
+ Staged,
+ Completed,
+}
+
+/// The data passed across reboots by serializing
+/// into the AUTOPKGTEST_REBOOT_MARK
+#[derive(Serialize, Deserialize, Debug, Default)]
+#[serde(rename_all = "kebab-case")]
+struct RebootMark {
+ /// Reboot strategy that was used for this last reboot
+ reboot_strategy: Option<InterruptStrategy>,
+ /// Counts attempts to interrupt an upgrade
+ iter: u32,
+ /// Counts times upgrade completed before we tried to interrupt
+ before: u32,
+ /// Results for "polite" interrupt attempts
+ polite: BTreeMap<PoliteInterruptStrategy, BTreeMap<UpdateResult, u32>>,
+ /// Results for "forced" interrupt attempts
+ force: BTreeMap<ForceInterruptStrategy, BTreeMap<UpdateResult, u32>>,
+}
+
+impl RebootMark {
+ fn get_results_map(
+ &mut self,
+ strategy: &InterruptStrategy,
+ ) -> &mut BTreeMap<UpdateResult, u32> {
+ match strategy {
+ InterruptStrategy::Polite(t) => self
+ .polite
+ .entry(t.clone())
+ .or_insert_with(|| BTreeMap::new()),
+ InterruptStrategy::Force(t) => self
+ .force
+ .entry(t.clone())
+ .or_insert_with(|| BTreeMap::new()),
+ }
+ }
+}
+
+impl InterruptStrategy {
+ pub(crate) fn is_noop(&self) -> bool {
+ match self {
+ InterruptStrategy::Polite(PoliteInterruptStrategy::None) => true,
+ _ => false,
+ }
+ }
+}
+
+/// TODO add readonly sysroot handling into base ostree
+fn testinit() -> Result<()> {
+ assert!(std::path::Path::new("/run/ostree-booted").exists());
+ sh_execute!(
+ r"if ! test -w /sysroot; then
+ mount -o remount,rw /sysroot
+fi"
+ )?;
+ Ok(())
+}
+
+/// Given a booted ostree, generate a modified version and write it
+/// into our srvrepo. This is fairly hacky; it'd be better if we
+/// reworked the tree mutation to operate on an ostree repo
+/// rather than a filesystem.
+fn generate_update(commit: &str) -> Result<()> {
+ println!("Generating update from {}", commit);
+ crate::treegen::update_os_tree(SRVREPO, TESTREF, TREEGEN_PERCENTAGE)
+ .context("Failed to generate new content")?;
+ // Amortize the prune across multiple runs; we don't want to leak space,
+ // but traversing all the objects is expensive. So here we only prune 1/5 of the time.
+ if rand::thread_rng().gen_ratio(1, 5) {
+ sh_execute!(
+ "ostree --repo={srvrepo} prune --refs-only --depth=1",
+ srvrepo = SRVREPO
+ )?;
+ }
+ Ok(())
+}
+
+/// Create an archive repository of current OS content. This is a bit expensive;
+/// in the future we should try a trick using the `parent` property on this repo,
+/// and then teach our webserver to redirect to the system for objects it doesn't
+/// have.
+fn generate_srv_repo(commit: &str) -> Result<()> {
+ sh_execute!(
+ r#"
+ ostree --repo={srvrepo} init --mode=archive
+ ostree --repo={srvrepo} config set archive.zlib-level 1
+ ostree --repo={srvrepo} pull-local /sysroot/ostree/repo {commit}
+ ostree --repo={srvrepo} refs --create={testref} {commit}
+ "#,
+ srvrepo = SRVREPO,
+ commit = commit,
+ testref = TESTREF
+ )
+ .context("Failed to generate srv repo")?;
+ generate_update(commit)?;
+ Ok(())
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+struct TransactionalTestInfo {
+ cycle_time: time::Duration,
+}
+
+#[derive(Serialize, Deserialize, Debug, Default)]
+struct Kill9Stats {
+ interrupted: u32,
+ staged: u32,
+ success: u32,
+}
+
+#[derive(Serialize, Deserialize, Debug, Default)]
+struct RebootStats {
+ interrupted: u32,
+ success: u32,
+}
+
+fn upgrade_and_finalize() -> Result<()> {
+ sh_execute!(
+ "rpm-ostree upgrade
+ systemctl start ostree-finalize-staged
+ systemctl stop ostree-finalize-staged"
+ )
+ .context("Upgrade and finalize failed")?;
+ Ok(())
+}
+
+async fn run_upgrade_or_timeout(timeout: time::Duration) -> Result<bool> {
+ let upgrade = tokio::task::spawn_blocking(upgrade_and_finalize);
+ Ok(tokio::select! {
+ res = upgrade => {
+ let _res = res?;
+ true
+ },
+ _ = tokio::time::delay_for(timeout) => {
+ false
+ }
+ })
+}
+
+/// The set of commits that we should see
+#[derive(Debug)]
+struct CommitStates {
+ booted: String,
+ orig: String,
+ prev: String,
+ target: String,
+}
+
+impl CommitStates {
+ pub(crate) fn describe(&self, commit: &str) -> Option<&'static str> {
+ if commit == self.booted {
+ Some("booted")
+ } else if commit == self.orig {
+ Some("orig")
+ } else if commit == self.prev {
+ Some("prev")
+ } else if commit == self.target {
+ Some("target")
+ } else {
+ None
+ }
+ }
+}
+
+/// In the case where we've entered via a reboot, this function
+/// checks the state of things, and also generates a new update
+/// if everything was successful.
+fn parse_and_validate_reboot_mark<M: AsRef<str>>(
+ commitstates: &mut CommitStates,
+ mark: M,
+) -> Result<RebootMark> {
+ let markstr = mark.as_ref();
+ let mut mark: RebootMark = serde_json::from_str(markstr)
+ .with_context(|| format!("Failed to parse reboot mark {:?}", markstr))?;
+ // The first failed reboot may be into the original booted commit
+ let status = rpmostree::query_status()?;
+ let firstdeploy = &status.deployments[0];
+ // The first deployment should not be staged
+ assert!(!firstdeploy.staged.unwrap_or(false));
+ assert!(firstdeploy.booted);
+ assert_eq!(firstdeploy.checksum, commitstates.booted);
+ let reboot_type = if let Some(t) = mark.reboot_strategy.as_ref() {
+ t.clone()
+ } else {
+ anyhow::bail!("No reboot strategy in mark");
+ };
+ if commitstates.booted == commitstates.target {
+ mark.get_results_map(&reboot_type)
+ .entry(UpdateResult::Completed)
+ .and_modify(|result_e| {
+ *result_e += 1;
+ })
+ .or_insert(1);
+ println!("Successfully updated to {}", commitstates.target);
+ // Since we successfully updated, generate a new commit to target
+ generate_update(&firstdeploy.checksum)?;
+ // Update the target state
+ let srvrepo_obj = ostree::Repo::new(&gio::File::new_for_path(SRVREPO));
+ srvrepo_obj.open(gio::NONE_CANCELLABLE)?;
+ commitstates.target = srvrepo_obj.resolve_rev(TESTREF, false)?.into();
+ } else if commitstates.booted == commitstates.orig || commitstates.booted == commitstates.prev {
+ println!(
+ "Failed update to {} (booted={})",
+ commitstates.target, commitstates.booted
+ );
+ mark.get_results_map(&reboot_type)
+ .entry(UpdateResult::NotCompleted)
+ .and_modify(|result_e| {
+ *result_e += 1;
+ })
+ .or_insert(1);
+ } else {
+ anyhow::bail!("Unexpected target commit: {}", firstdeploy.checksum);
+ };
+ // Empty this out
+ mark.reboot_strategy = None;
+ Ok(mark)
+}
+
+fn validate_pending_commit(pending_commit: &str, commitstates: &CommitStates) -> Result<()> {
+ if pending_commit != commitstates.target {
+ sh_execute!("rpm-ostree status -v")?;
+ sh_execute!(
+ "ostree show {pending_commit}",
+ pending_commit = pending_commit
+ )?;
+ anyhow::bail!(
+ "Expected target commit={} but pending={} ({:?})",
+ commitstates.target,
+ pending_commit,
+ commitstates.describe(pending_commit)
+ );
+ }
+ Ok(())
+}
+
+/// In the case where we did a kill -9 of rpm-ostree, check the state
+fn validate_live_interrupted_upgrade(commitstates: &CommitStates) -> Result<UpdateResult> {
+ let status = rpmostree::query_status()?;
+ let firstdeploy = &status.deployments[0];
+ let pending_commit = firstdeploy.checksum.as_str();
+ let res = if firstdeploy.staged.unwrap_or(false) {
+ assert!(!firstdeploy.booted);
+ validate_pending_commit(pending_commit, &commitstates)?;
+ UpdateResult::Staged
+ } else {
+ if pending_commit == commitstates.booted {
+ UpdateResult::NotCompleted
+ } else if pending_commit == commitstates.target {
+ UpdateResult::Completed
+ } else {
+ anyhow::bail!(
+ "Unexpected pending commit: {} ({:?})",
+ pending_commit,
+ commitstates.describe(pending_commit)
+ );
+ }
+ };
+ Ok(res)
+}
+
+fn impl_transaction_test<M: AsRef<str>>(
+ booted_commit: &str,
+ tdata: &TransactionalTestInfo,
+ mark: Option<M>,
+) -> Result<()> {
+ let polite_strategies = PoliteInterruptStrategy::iter().collect::<Vec<_>>();
+ let force_strategies = ForceInterruptStrategy::iter().collect::<Vec<_>>();
+
+ // Gather the expected possible commits
+ let mut commitstates = {
+ let srvrepo_obj = ostree::Repo::new(&gio::File::new_for_path(SRVREPO));
+ srvrepo_obj.open(gio::NONE_CANCELLABLE)?;
+ let sysrepo_obj = ostree::Repo::new(&gio::File::new_for_path("/sysroot/ostree/repo"));
+ sysrepo_obj.open(gio::NONE_CANCELLABLE)?;
+
+ CommitStates {
+ booted: booted_commit.to_string(),
+ orig: sysrepo_obj.resolve_rev(ORIGREF, false)?.into(),
+ prev: srvrepo_obj
+ .resolve_rev(&format!("{}^", TESTREF), false)?
+ .into(),
+ target: srvrepo_obj.resolve_rev(TESTREF, false)?.into(),
+ }
+ };
+
+ let mut mark = if let Some(mark) = mark {
+ let markstr = mark.as_ref();
+ // In the successful case, this generates a new target commit,
+ // so we pass via &mut.
+ parse_and_validate_reboot_mark(&mut commitstates, markstr)
+ .context("Failed to parse reboot mark")?
+ } else {
+ RebootMark {
+ ..Default::default()
+ }
+ };
+ // Drop the &mut
+ let commitstates = commitstates;
+
+ assert_ne!(commitstates.booted.as_str(), commitstates.target.as_str());
+
+ let mut rt = tokio::runtime::Runtime::new()?;
+ let cycle_time_ms = (tdata.cycle_time.as_secs_f64() * 1000f64 * FORCE_REBOOT_AFTER_MUL) as u64;
+ // Set when we're trying an interrupt strategy that isn't a reboot, so we will
+ // re-enter the loop below.
+ let mut live_strategy: Option<InterruptStrategy> = None;
+ let mut retries = 0;
+ // This loop is for the non-rebooting strategies - we might use kill -9
+ // or not interrupt at all. But if we choose a reboot strategy
+ // then we'll exit implicitly via the reboot, and reenter the function
+ // above.
+ loop {
+ // Save the previous strategy as a string so we can use it in error
+ // messages below
+ let prev_strategy_str = format!("{:?}", live_strategy);
+ // Process the results of the previous run if any, and reset
+ // live_strategy to None
+ if let Some(last_strategy) = live_strategy.take() {
+ mark.iter += 1;
+ retries = 0;
+ let res = validate_live_interrupted_upgrade(&commitstates)?;
+ if last_strategy.is_noop() {
+ assert_eq!(res, UpdateResult::Completed)
+ }
+ mark.get_results_map(&last_strategy)
+ .entry(res)
+ .and_modify(|result_e| {
+ *result_e += 1;
+ })
+ .or_insert(1);
+ }
+ // If we've reached our target iterations, exit the test successfully
+ if mark.iter == ITERATIONS {
+ // TODO also add ostree admin fsck to check the deployment directories
+ sh_execute!(
+ "echo Performing final validation...
+ ostree fsck"
+ )?;
+ return Ok(());
+ }
+ let mut rng = rand::thread_rng();
+ // Pick a strategy for this attempt
+ let strategy: InterruptStrategy = if rand::thread_rng()
+ .gen_ratio(FORCE_INTERRUPT_PERCENTAGE, 100)
+ {
+ InterruptStrategy::Force(force_strategies.choose(&mut rng).expect("strategy").clone())
+ } else {
+ InterruptStrategy::Polite(
+ polite_strategies
+ .choose(&mut rng)
+ .expect("strategy")
+ .clone(),
+ )
+ };
+ println!("Using interrupt strategy: {:?}", strategy);
+ // Interrupt usually before the upgrade would
+ // complete, but also a percentage of the time after.
+ // The no-op case is special in that we want to wait for it to complete
+ let sleeptime = if strategy.is_noop() {
+ // In the no-op case, sleep for minimum of 20x the cycle time, or one day
+ let ms = std::cmp::min(cycle_time_ms.saturating_mul(20), 24 * 60 * 60 * 1000);
+ time::Duration::from_millis(ms)
+ } else {
+ time::Duration::from_millis(rng.gen_range(0, cycle_time_ms))
+ };
+ println!(
+ "force-reboot-time={:?} cycle={:?} status:{:?}",
+ sleeptime, tdata.cycle_time, &mark
+ );
+ // Reset the target ref to booted, and perform a cleanup
+ // to ensure we're re-downloading objects each time
+ sh_execute!(
+ "
+ systemctl stop rpm-ostreed
+ systemctl stop ostree-finalize-staged
+ ostree reset testrepo:{testref} {booted_commit}
+ rpm-ostree cleanup -pbrm
+ ",
+ testref = TESTREF,
+ booted_commit = booted_commit
+ )
+ .with_context(|| {
+ format!(
+ "Failed pre-upgrade cleanup (prev strategy: {})",
+ prev_strategy_str.as_str()
+ )
+ })?;
+
+ // The heart of the test - start an upgrade and wait a random amount
+ // of time to interrupt. If the result is true, then the upgrade completed
+ // successfully before the timeout.
+ let res: Result<bool> = rt.block_on(async move { run_upgrade_or_timeout(sleeptime).await });
+ let res = res.context("Failed during upgrade")?;
+ if res {
+ if !strategy.is_noop() {
+ println!(
+ "Failed to interrupt upgrade, attempt {}/{}",
+ retries, ITERATION_RETRIES
+ );
+ retries += 1;
+ mark.before += 1;
+ } else {
+ live_strategy = Some(strategy);
+ }
+ let status = rpmostree::query_status()?;
+ let firstdeploy = &status.deployments[0];
+ let pending_commit = firstdeploy.checksum.as_str();
+ validate_pending_commit(pending_commit, &commitstates)
+ .context("Failed to validate pending commit")?;
+ } else {
+ // Our timeout fired before the upgrade completed; execute
+ // the interrupt strategy.
+ match strategy {
+ InterruptStrategy::Force(ForceInterruptStrategy::Kill9) => {
+ sh_execute!(
+ "systemctl kill -s KILL rpm-ostreed || true
+ systemctl kill -s KILL ostree-finalize-staged || true"
+ )?;
+ live_strategy = Some(strategy);
+ }
+ InterruptStrategy::Force(ForceInterruptStrategy::Reboot) => {
+ mark.reboot_strategy = Some(strategy.clone());
+ prepare_reboot(serde_json::to_string(&mark)?)?;
+ // This is a forced reboot - no syncing of the filesystem.
+ sh_execute!("reboot -ff")?;
+ std::thread::sleep(time::Duration::from_secs(60));
+ // Shouldn't happen
+ anyhow::bail!("failed to reboot");
+ }
+ InterruptStrategy::Polite(PoliteInterruptStrategy::None) => {
+ anyhow::bail!("Failed to wait for uninterrupted upgrade");
+ }
+ InterruptStrategy::Polite(PoliteInterruptStrategy::Reboot) => {
+ mark.reboot_strategy = Some(strategy.clone());
+ Err(reboot(serde_json::to_string(&mark)?))?;
+ // We either rebooted, or failed to reboot
+ }
+ InterruptStrategy::Polite(PoliteInterruptStrategy::Stop) => {
+ sh_execute!(
+ "systemctl stop rpm-ostreed || true
+ systemctl stop ostree-finalize-staged || true"
+ )?;
+ live_strategy = Some(strategy);
+ }
+ }
+ }
+ }
+}
+
+#[itest(destructive = true)]
+fn transactionality() -> Result<()> {
+ testinit()?;
+ let mark = get_reboot_mark()?;
+ let cancellable = Some(gio::Cancellable::new());
+ let sysroot = ostree::Sysroot::new_default();
+ sysroot.load(cancellable.as_ref())?;
+ assert!(sysroot.is_booted());
+ let booted = sysroot.get_booted_deployment().expect("booted deployment");
+ let commit: String = booted.get_csum().expect("booted csum").into();
+ // We need this static across reboots
+ let srvrepo = Path::new(SRVREPO);
+ let firstrun = !srvrepo.exists();
+ if let Some(_) = mark.as_ref() {
+ if firstrun {
+ anyhow::bail!("Missing {:?}", srvrepo);
+ }
+ } else {
+ if !firstrun {
+ anyhow::bail!("Unexpected {:?}", srvrepo);
+ }
+ generate_srv_repo(&commit)?;
+ }
+
+ // Let's assume we're changing about 200 objects each time;
+ // that leads to probably 300 network requests, so we want
+ // a low average delay.
+ let webserver_opts = TestHttpServerOpts {
+ random_delay: Some(time::Duration::from_secs_f64(WEBSERVER_DELAY_SECS)),
+ ..Default::default()
+ };
+ with_webserver_in(&srvrepo, &webserver_opts, move |addr| {
+ let url = format!("http://{}", addr);
+ sh_execute!(
+ "ostree remote delete --if-exists testrepo
+ ostree remote add --set=gpg-verify=false testrepo {url}",
+ url = url
+ )?;
+
+ if firstrun {
+ // Also disable some services (like zincati) because we don't want automatic updates
+ // in our reboots, and it currently fails to start. The less
+ // we have in each reboot, the faster reboots are.
+ sh_execute!("systemctl disable --now zincati fedora-coreos-pinger")?;
+ // And prepare for updates
+ sh_execute!("rpm-ostree cleanup -pr")?;
+ generate_update(&commit)?;
+ // Directly set the origin, so that we're not dependent on the pending deployment.
+ // FIXME: make this saner
+ sh_execute!(
+ "
+ ostree admin set-origin testrepo {url} {testref}
+ ostree refs --create testrepo:{testref} {commit}
+ ostree refs --create={origref} {commit}
+ ",
+ url = url,
+ origref = ORIGREF,
+ testref = TESTREF,
+ commit = commit
+ )?;
+ // We gather a single "cycle time" at start as a way of gauging how
+ // long an upgrade should take, so we know when to interrupt. This
+ // obviously has some pitfalls, mainly when there are e.g. other competing
+ // VMs when we start but not after (or vice versa) we can either
+ // interrupt almost always too early, or too late.
+ let start = time::Instant::now();
+ upgrade_and_finalize().context("Firstrun upgrade failed")?;
+ let end = time::Instant::now();
+ let cycle_time = end.duration_since(start);
+ let tdata = TransactionalTestInfo {
+ cycle_time: cycle_time,
+ };
+ let mut f = std::io::BufWriter::new(std::fs::File::create(&TDATAPATH)?);
+ serde_json::to_writer(&mut f, &tdata)?;
+ f.flush()?;
+ sh_execute!("rpm-ostree status")?;
+ }
+
+ let tdata = {
+ let mut f = std::io::BufReader::new(std::fs::File::open(&TDATAPATH)?);
+ serde_json::from_reader(&mut f).context("Failed to parse test info JSON")?
+ };
+
+ impl_transaction_test(commit.as_str(), &tdata, mark.as_ref())?;
+
+ Ok(())
+ })?;
+ Ok(())
+}
--- /dev/null
+use anyhow::{Context, Result};
+use commandspec::sh_execute;
+use openat_ext::{FileExt, OpenatDirExt};
+use rand::Rng;
+use std::fs::File;
+use std::io::prelude::*;
+use std::os::unix::fs::FileExt as UnixFileExt;
+use std::path::Path;
+
+use crate::test::*;
+
+/// Each time this is invoked it changes file contents
+/// in the target root, in a predictable way.
+pub(crate) fn mkroot<P: AsRef<Path>>(p: P) -> Result<()> {
+ let p = p.as_ref();
+ let verpath = p.join("etc/.mkrootversion");
+ let v: u32 = if verpath.exists() {
+ let s = std::fs::read_to_string(&verpath)?;
+ let v: u32 = s.trim_end().parse()?;
+ v + 1
+ } else {
+ 0
+ };
+ mkvroot(p, v)
+}
+
+// Like mkroot but supports an explicit version
+pub(crate) fn mkvroot<P: AsRef<Path>>(p: P, v: u32) -> Result<()> {
+ let p = p.as_ref();
+ for v in &["usr/bin", "etc"] {
+ std::fs::create_dir_all(p.join(v))?;
+ }
+ let verpath = p.join("etc/.mkrootversion");
+ write_file(&verpath, &format!("{}", v))?;
+ write_file(p.join("usr/bin/somebinary"), &format!("somebinary v{}", v))?;
+ write_file(p.join("etc/someconf"), &format!("someconf v{}", v))?;
+ write_file(p.join("usr/bin/vmod2"), &format!("somebinary v{}", v % 2))?;
+ write_file(p.join("usr/bin/vmod3"), &format!("somebinary v{}", v % 3))?;
+ Ok(())
+}
+
+/// Returns `true` if a file is ELF; see https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
+pub(crate) fn is_elf(f: &mut File) -> Result<bool> {
+ let mut buf = [0; 5];
+ let n = f.read_at(&mut buf, 0)?;
+ if n < buf.len() {
+ anyhow::bail!("Failed to read expected {} bytes", buf.len());
+ }
+ Ok(buf[0] == 0x7F && &buf[1..4] == b"ELF")
+}
+
+pub(crate) fn mutate_one_executable_to(
+ f: &mut File,
+ name: &std::ffi::OsStr,
+ dest: &openat::Dir,
+) -> Result<()> {
+ let mut destf = dest
+ .write_file(name, 0o755)
+ .context("Failed to open for write")?;
+ f.copy_to(&destf).context("Failed to copy")?;
+ // ELF is OK with us just appending some junk
+ let extra = rand::thread_rng()
+ .sample_iter(&rand::distributions::Alphanumeric)
+ .take(10)
+ .collect::<String>();
+ destf
+ .write_all(extra.as_bytes())
+ .context("Failed to append extra data")?;
+ Ok(())
+}
+
+/// Find ELF files in the srcdir, write new copies to dest (only percentage)
+pub(crate) fn mutate_executables_to(
+ src: &openat::Dir,
+ dest: &openat::Dir,
+ percentage: u32,
+) -> Result<u32> {
+ use nix::sys::stat::Mode as NixMode;
+ assert!(percentage > 0 && percentage <= 100);
+ let mut mutated = 0;
+ for entry in src.list_dir(".")? {
+ let entry = entry?;
+ if src.get_file_type(&entry)? != openat::SimpleType::File {
+ continue;
+ }
+ let meta = src.metadata(entry.file_name())?;
+ let st = meta.stat();
+ let mode = NixMode::from_bits_truncate(st.st_mode);
+ // Must be executable
+ if !mode.intersects(NixMode::S_IXUSR | NixMode::S_IXGRP | NixMode::S_IXOTH) {
+ continue;
+ }
+ // Not suid
+ if mode.intersects(NixMode::S_ISUID | NixMode::S_ISGID) {
+ continue;
+ }
+ // Greater than 1k in size
+ if st.st_size < 1024 {
+ continue;
+ }
+ let mut f = src.open_file(entry.file_name())?;
+ if !is_elf(&mut f)? {
+ continue;
+ }
+ if !rand::thread_rng().gen_ratio(percentage, 100) {
+ continue;
+ }
+ mutate_one_executable_to(&mut f, entry.file_name(), dest)
+ .with_context(|| format!("Failed updating {:?}", entry.file_name()))?;
+ mutated += 1;
+ }
+ Ok(mutated)
+}
+
+// Given an ostree ref, use the running root filesystem as a source, update
+// `percentage` percent of binary (ELF) files
+pub(crate) fn update_os_tree<P: AsRef<Path>>(
+ repo_path: P,
+ ostref: &str,
+ percentage: u32,
+) -> Result<()> {
+ assert!(percentage > 0 && percentage <= 100);
+ let repo_path = repo_path.as_ref();
+ let tempdir = tempfile::tempdir_in(repo_path.join("tmp"))?;
+ let mut mutated = 0;
+ {
+ let tempdir = openat::Dir::open(tempdir.path())?;
+ let binary_dirs = &["usr/bin", "usr/sbin", "usr/lib", "usr/lib64"];
+ let rootfs = openat::Dir::open("/")?;
+ for v in binary_dirs {
+ let v = *v;
+ if let Some(src) = rootfs.sub_dir_optional(v)? {
+ tempdir.ensure_dir("usr", 0o755)?;
+ tempdir.ensure_dir(v, 0o755)?;
+ let dest = tempdir.sub_dir(v)?;
+ mutated += mutate_executables_to(&src, &dest, percentage)
+ .with_context(|| format!("Replacing binaries in {}", v))?;
+ }
+ }
+ }
+ assert!(mutated > 0);
+ println!("Mutated ELF files: {}", mutated);
+ sh_execute!("ostree --repo={repo} commit --consume -b {ostref} --base={ostref} --tree=dir={tempdir} --owner-uid 0 --owner-gid 0 --selinux-policy-from-base --link-checkout-speedup --no-bindings --no-xattrs",
+ repo = repo_path.to_str().unwrap(),
+ ostref = ostref,
+ tempdir = tempdir.path().to_str().unwrap()).context("Failed to commit updated content")?;
+ Ok(())
+}