From: Alex Crichton Date: Wed, 6 Dec 2017 22:53:09 +0000 (-0800) Subject: Avoid rebuilding a project when cwd changes X-Git-Tag: archive/raspbian/0.35.0-2+rpi1~3^2^2^2^2^2^2^2~22^2~4^2~25^2~1 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=8647a87d29fdd046592faed5f870aa7b2141cc24;p=cargo.git Avoid rebuilding a project when cwd changes This commit is targeted at solving a use case which typically comes up during CI builds -- the `target` directory is cached between builds but the cwd of the build changes over time. For example the following scenario can happen: 1. A project is compiled at `/projects/a`. 2. The `target` directory is cached. 3. A new build is started in `/projects/b`. 4. The previous `target` directory is restored to `/projects/b`. 5. The build start, and Cargo rebuilds everything. The last piece of behavior is indeed unfortunate! Cargo's internal hashing currently isn't that resilient to changing cwd and this PR aims to help improve the situation! The first point of too-much-hashing came up with `Target::src_path`. Each `Target` was hashed and stored for all compilations, and the `src_path` field was an absolute path on the filesystem to the file that needed to be compiled. This path then changed over time when cwd changed, but otherwise everything else remained the same! This commit updates the handling of the `src_path` field to simply ignore it when hashing. Instead the path we actually pass to rustc is later calculated and then passed to the fingerprint calculation. The next problem this fixes is that the dep info files were augmented after creation to have the cwd of the compiler at the time to find the files at a later date. This, unfortunately, would cause issues if the cwd itself changed. Instead the cwd is now left out of dep-info files (they're no longer augmented) and instead the cwd is recalculated when parsing the dep info later. The final problem that this commit fixes is actually an existing issue in Cargo today. Right now you can actually execute `cargo build` from anywhere in a project and Cargo will execute the build. Unfortunately though the argument to rustc was actually different depending on what directory you were in (the compiler was invoked with a path relative to cwd). This path ends up being used for metadata like debuginfo which means that different directories would cause different artifacts to be created, but Cargo wouldn't rerun the compiler! To fix this issue the matter of cwd is now entirely excluded from compilation command lines. Instead rustc is unconditionally invoked with a relative path *if* the path is underneath the workspace root, and otherwise it's invoked as an absolute path (in which case the cwd doesn't matter). Once all these fixes were added up it means that now we can have projects where if you move the entire directory Cargo won't rebuild the original source! Note that this may be a bit of a breaking change, however. This means that the paths in error messages for cargo will no longer be unconditionally relative to the current working directory, but rather relative to the root of the workspace itself. Unfortunately this is moreso of a feature right now rather than a bug, so it may be one that we just have to stomach. --- diff --git a/src/cargo/core/manifest.rs b/src/cargo/core/manifest.rs index 0ebd0a39d..51ed364ee 100644 --- a/src/cargo/core/manifest.rs +++ b/src/cargo/core/manifest.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, BTreeMap}; use std::fmt; use std::path::{PathBuf, Path}; use std::rc::Rc; +use std::hash::{Hash, Hasher}; use semver::Version; use serde::ser; @@ -199,7 +200,11 @@ pub struct Profiles { pub struct Target { kind: TargetKind, name: String, - src_path: PathBuf, + // Note that the `src_path` here is excluded from the `Hash` implementation + // as it's absolute currently and is otherwise a little too brittle for + // causing rebuilds. Instead the hash for the path that we send to the + // compiler is handled elsewhere. + src_path: NonHashedPathBuf, required_features: Option>, tested: bool, benched: bool, @@ -209,6 +214,17 @@ pub struct Target { for_host: bool, } +#[derive(Clone, PartialEq, Eq, Debug)] +struct NonHashedPathBuf { + path: PathBuf, +} + +impl Hash for NonHashedPathBuf { + fn hash(&self, _: &mut H) { + // ... + } +} + #[derive(Serialize)] struct SerializedTarget<'a> { /// Is this a `--bin bin`, `--lib`, `--example ex`? @@ -227,7 +243,7 @@ impl ser::Serialize for Target { kind: &self.kind, crate_types: self.rustc_crate_types(), name: &self.name, - src_path: &self.src_path, + src_path: &self.src_path.path, }.serialize(s) } } @@ -370,7 +386,7 @@ impl Target { Target { kind: TargetKind::Bin, name: String::new(), - src_path: src_path, + src_path: NonHashedPathBuf { path: src_path }, required_features: None, doc: false, doctest: false, @@ -459,7 +475,7 @@ impl Target { pub fn name(&self) -> &str { &self.name } pub fn crate_name(&self) -> String { self.name.replace("-", "_") } - pub fn src_path(&self) -> &Path { &self.src_path } + pub fn src_path(&self) -> &Path { &self.src_path.path } pub fn required_features(&self) -> Option<&Vec> { self.required_features.as_ref() } pub fn kind(&self) -> &TargetKind { &self.kind } pub fn tested(&self) -> bool { self.tested } diff --git a/src/cargo/ops/cargo_rustc/fingerprint.rs b/src/cargo/ops/cargo_rustc/fingerprint.rs index 62a53a857..ae6e051cc 100644 --- a/src/cargo/ops/cargo_rustc/fingerprint.rs +++ b/src/cargo/ops/cargo_rustc/fingerprint.rs @@ -1,8 +1,8 @@ use std::env; -use std::fs::{self, File, OpenOptions}; +use std::fs::{self, File}; use std::hash::{self, Hasher}; use std::io::prelude::*; -use std::io::{BufReader, SeekFrom}; +use std::io::BufReader; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; @@ -99,8 +99,9 @@ pub fn prepare_target<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, } let allow_failure = unit.profile.rustc_args.is_some(); + let target_root = cx.target_root().to_path_buf(); let write_fingerprint = Work::new(move |_| { - match fingerprint.update_local() { + match fingerprint.update_local(&target_root) { Ok(()) => {} Err(..) if allow_failure => return Ok(()), Err(e) => return Err(e) @@ -139,6 +140,7 @@ pub struct Fingerprint { features: String, target: u64, profile: u64, + path: u64, #[serde(serialize_with = "serialize_deps", deserialize_with = "deserialize_deps")] deps: Vec<(String, Arc)>, local: Vec, @@ -165,6 +167,7 @@ fn deserialize_deps<'de, D>(d: D) -> Result)>, D:: rustc: 0, target: 0, profile: 0, + path: 0, local: vec![LocalFingerprint::Precalculated(String::new())], features: String::new(), deps: Vec::new(), @@ -181,15 +184,27 @@ enum LocalFingerprint { EnvBased(String, Option), } +impl LocalFingerprint { + fn mtime(root: &Path, mtime: Option, path: &Path) + -> LocalFingerprint + { + let mtime = MtimeSlot(Mutex::new(mtime)); + assert!(path.is_absolute()); + let path = path.strip_prefix(root).unwrap_or(path); + LocalFingerprint::MtimeBased(mtime, path.to_path_buf()) + } +} + struct MtimeSlot(Mutex>); impl Fingerprint { - fn update_local(&self) -> CargoResult<()> { + fn update_local(&self, root: &Path) -> CargoResult<()> { let mut hash_busted = false; for local in self.local.iter() { match *local { LocalFingerprint::MtimeBased(ref slot, ref path) => { - let meta = fs::metadata(path) + let path = root.join(path); + let meta = fs::metadata(&path) .chain_err(|| { internal(format!("failed to stat `{}`", path.display())) })?; @@ -227,11 +242,14 @@ impl Fingerprint { if self.target != old.target { bail!("target configuration has changed") } + if self.path != old.path { + bail!("path to the compiler has changed") + } if self.profile != old.profile { bail!("profile configuration has changed") } if self.rustflags != old.rustflags { - return Err(internal("RUSTFLAGS has changed")) + bail!("RUSTFLAGS has changed") } if self.local.len() != old.local.len() { bail!("local lens changed"); @@ -294,13 +312,14 @@ impl hash::Hash for Fingerprint { rustc, ref features, target, + path, profile, ref deps, ref local, memoized_hash: _, ref rustflags, } = *self; - (rustc, features, target, profile, local, rustflags).hash(h); + (rustc, features, target, path, profile, local, rustflags).hash(h); h.write_usize(deps.len()); for &(ref name, ref fingerprint) in deps { @@ -375,8 +394,8 @@ fn calculate<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) // And finally, calculate what our own local fingerprint is let local = if use_dep_info(unit) { let dep_info = dep_info_loc(cx, unit); - let mtime = dep_info_mtime_if_fresh(&dep_info)?; - LocalFingerprint::MtimeBased(MtimeSlot(Mutex::new(mtime)), dep_info) + let mtime = dep_info_mtime_if_fresh(unit.pkg, &dep_info)?; + LocalFingerprint::mtime(cx.target_root(), mtime, &dep_info) } else { let fingerprint = pkg_fingerprint(cx, unit.pkg)?; LocalFingerprint::Precalculated(fingerprint) @@ -392,6 +411,9 @@ fn calculate<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) rustc: util::hash_u64(&cx.config.rustc()?.verbose_version), target: util::hash_u64(&unit.target), profile: util::hash_u64(&unit.profile), + // Note that .0 is hashed here, not .1 which is the cwd. That doesn't + // actually affect the output artifact so there's no need to hash it. + path: util::hash_u64(&super::path_args(cx, unit).0), features: format!("{:?}", cx.resolve.features_sorted(unit.pkg.package_id())), deps: deps, local: vec![local], @@ -443,6 +465,7 @@ pub fn prepare_build_cmd<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) rustc: 0, target: 0, profile: 0, + path: 0, features: String::new(), deps: Vec::new(), local: local, @@ -464,7 +487,8 @@ pub fn prepare_build_cmd<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) // build script. let state = Arc::clone(&cx.build_state); let key = (unit.pkg.package_id().clone(), unit.kind); - let root = unit.pkg.root().to_path_buf(); + let pkg_root = unit.pkg.root().to_path_buf(); + let target_root = cx.target_root().to_path_buf(); let write_fingerprint = Work::new(move |_| { if let Some(output_path) = output_path { let outputs = state.outputs.lock().unwrap(); @@ -472,8 +496,8 @@ pub fn prepare_build_cmd<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) if !outputs.rerun_if_changed.is_empty() || !outputs.rerun_if_env_changed.is_empty() { let deps = BuildDeps::new(&output_path, Some(outputs)); - fingerprint.local = local_fingerprints_deps(&deps, &root); - fingerprint.update_local()?; + fingerprint.local = local_fingerprints_deps(&deps, &target_root, &pkg_root); + fingerprint.update_local(&target_root)?; } } write_fingerprint(&loc, &fingerprint) @@ -516,18 +540,19 @@ fn build_script_local_fingerprints<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, // Ok so now we're in "new mode" where we can have files listed as // dependencies as well as env vars listed as dependencies. Process them all // here. - Ok((local_fingerprints_deps(deps, unit.pkg.root()), Some(output))) + Ok((local_fingerprints_deps(deps, cx.target_root(), unit.pkg.root()), Some(output))) } -fn local_fingerprints_deps(deps: &BuildDeps, root: &Path) -> Vec { +fn local_fingerprints_deps(deps: &BuildDeps, target_root: &Path, pkg_root: &Path) + -> Vec +{ debug!("new local fingerprints deps"); let mut local = Vec::new(); if !deps.rerun_if_changed.is_empty() { let output = &deps.build_script_output; - let deps = deps.rerun_if_changed.iter().map(|p| root.join(p)); + let deps = deps.rerun_if_changed.iter().map(|p| pkg_root.join(p)); let mtime = mtime_if_fresh(output, deps); - let mtime = MtimeSlot(Mutex::new(mtime)); - local.push(LocalFingerprint::MtimeBased(mtime, output.clone())); + local.push(LocalFingerprint::mtime(target_root, mtime, output)); } for var in deps.rerun_if_env_changed.iter() { @@ -584,23 +609,19 @@ fn log_compare(unit: &Unit, compare: &CargoResult<()>) { }; info!("fingerprint error for {}: {}", unit.pkg, ce); - for cause in ce.iter() { + for cause in ce.iter().skip(1) { info!(" cause: {}", cause); } } // Parse the dep-info into a list of paths -pub fn parse_dep_info(dep_info: &Path) -> CargoResult>> { +pub fn parse_dep_info(cx: &Context, dep_info: &Path) + -> CargoResult>> +{ macro_rules! fs_try { ($e:expr) => (match $e { Ok(e) => e, Err(..) => return Ok(None) }) } - let mut f = BufReader::new(fs_try!(File::open(dep_info))); - // see comments in append_current_dir for where this cwd is manifested from. - let mut cwd = Vec::new(); - if fs_try!(f.read_until(0, &mut cwd)) == 0 { - return Ok(None) - } - let cwd = util::bytes2path(&cwd[..cwd.len()-1])?; + let f = BufReader::new(fs_try!(File::open(dep_info))); let line = match f.lines().next() { Some(Ok(line)) => line, _ => return Ok(None), @@ -622,13 +643,19 @@ pub fn parse_dep_info(dep_info: &Path) -> CargoResult>> { internal("malformed dep-info format, trailing \\".to_string()) })?); } - paths.push(cwd.join(&file)); + + // Note that paths emitted in dep info files may be relative, but due to + // `path_args` in the module above this the relative paths are always + // relative to the root of a workspace. + paths.push(cx.ws.root().join(&file)); } Ok(Some(paths)) } -fn dep_info_mtime_if_fresh(dep_info: &Path) -> CargoResult> { - if let Some(paths) = parse_dep_info(dep_info)? { +fn dep_info_mtime_if_fresh(cx: &Context, dep_info: &Path) + -> CargoResult> +{ + if let Some(paths) = parse_dep_info(cx, dep_info)? { Ok(mtime_if_fresh(dep_info, paths.iter())) } else { Ok(None) @@ -703,20 +730,3 @@ fn filename<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) -> String { }; format!("{}{}-{}", flavor, kind, file_stem) } - -// The dep-info files emitted by the compiler all have their listed paths -// relative to whatever the current directory was at the time that the compiler -// was invoked. As the current directory may change over time, we need to record -// what that directory was at the beginning of the file so we can know about it -// next time. -pub fn append_current_dir(path: &Path, cwd: &Path) -> CargoResult<()> { - debug!("appending {} <- {}", path.display(), cwd.display()); - let mut f = OpenOptions::new().read(true).write(true).open(path)?; - let mut contents = Vec::new(); - f.read_to_end(&mut contents)?; - f.seek(SeekFrom::Start(0))?; - f.write_all(util::path2bytes(cwd)?)?; - f.write_all(&[0])?; - f.write_all(&contents)?; - Ok(()) -} diff --git a/src/cargo/ops/cargo_rustc/mod.rs b/src/cargo/ops/cargo_rustc/mod.rs index bc0c812ca..fc96ddb5a 100644 --- a/src/cargo/ops/cargo_rustc/mod.rs +++ b/src/cargo/ops/cargo_rustc/mod.rs @@ -346,7 +346,6 @@ fn rustc<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, root.join(&cx.file_stem(unit)) }.with_extension("d"); let dep_info_loc = fingerprint::dep_info_loc(cx, unit); - let cwd = cx.config.cwd().to_path_buf(); rustc.args(&cx.incremental_args(unit)?); rustc.args(&cx.rustflags_args(unit)?); @@ -444,7 +443,6 @@ fn rustc<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, internal(format!("could not rename dep info: {:?}", rustc_dep_info_loc)) })?; - fingerprint::append_current_dir(&dep_info_loc, &cwd)?; } Ok(()) @@ -653,9 +651,8 @@ fn rustdoc<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) -> CargoResult { let mut rustdoc = cx.compilation.rustdoc_process(unit.pkg)?; rustdoc.inherit_jobserver(&cx.jobserver); - rustdoc.arg("--crate-name").arg(&unit.target.crate_name()) - .cwd(cx.config.cwd()) - .arg(&root_path(cx, unit)); + rustdoc.arg("--crate-name").arg(&unit.target.crate_name()); + add_path_args(cx, unit, &mut rustdoc); if unit.kind != Kind::Host { if let Some(target) = cx.requested_target() { @@ -703,23 +700,34 @@ fn rustdoc<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, } // The path that we pass to rustc is actually fairly important because it will -// show up in error messages and the like. For this reason we take a few moments -// to ensure that something shows up pretty reasonably. +// show up in error messages (important for readability), debug information +// (important for caching), etc. As a result we need to be pretty careful how we +// actually invoke rustc. // -// The heuristic here is fairly simple, but the key idea is that the path is -// always "relative" to the current directory in order to be found easily. The -// path is only actually relative if the current directory is an ancestor if it. -// This means that non-path dependencies (git/registry) will likely be shown as -// absolute paths instead of relative paths. -fn root_path(cx: &Context, unit: &Unit) -> PathBuf { - let absolute = unit.pkg.root().join(unit.target.src_path()); - let cwd = cx.config.cwd(); - if absolute.starts_with(cwd) { - util::without_prefix(&absolute, cwd).map(|s| { - s.to_path_buf() - }).unwrap_or(absolute) - } else { - absolute +// In general users don't expect `cargo build` to cause rebuilds if you change +// directories. That could be if you just change directories in the project or +// if you literally move the whole project wholesale to a new directory. As a +// result we mostly don't factor in `cwd` to this calculation. Instead we try to +// track the workspace as much as possible and we update the current directory +// of rustc/rustdoc where approrpriate. +// +// The first returned value here is the argument to pass to rustc, and the +// second is the cwd that rustc should operate in. +fn path_args(cx: &Context, unit: &Unit) -> (PathBuf, Option) { + let ws_root = cx.ws.root(); + let src = unit.target.src_path(); + assert!(src.is_absolute()); + match src.strip_prefix(ws_root) { + Ok(path) => (path.to_path_buf(), Some(ws_root.to_path_buf())), + Err(_) => (src.to_path_buf(), None), + } +} + +fn add_path_args(cx: &Context, unit: &Unit, cmd: &mut ProcessBuilder) { + let (arg, cwd) = path_args(cx, unit); + cmd.arg(arg); + if let Some(cwd) = cwd { + cmd.cwd(cwd); } } @@ -734,12 +742,9 @@ fn build_base_args<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, } = *unit.profile; assert!(!run_custom_build); - // Move to cwd so the root_path() passed below is actually correct - cmd.cwd(cx.config.cwd()); - cmd.arg("--crate-name").arg(&unit.target.crate_name()); - cmd.arg(&root_path(cx, unit)); + add_path_args(cx, unit, cmd); match cx.config.shell().color_choice() { ColorChoice::Always => { cmd.arg("--color").arg("always"); } diff --git a/src/cargo/ops/cargo_rustc/output_depinfo.rs b/src/cargo/ops/cargo_rustc/output_depinfo.rs index b07b299f0..94b460463 100644 --- a/src/cargo/ops/cargo_rustc/output_depinfo.rs +++ b/src/cargo/ops/cargo_rustc/output_depinfo.rs @@ -36,7 +36,7 @@ fn add_deps_for_unit<'a, 'b>( if !unit.profile.run_custom_build { // Add dependencies from rustc dep-info output (stored in fingerprint directory) let dep_info_loc = fingerprint::dep_info_loc(context, unit); - if let Some(paths) = fingerprint::parse_dep_info(&dep_info_loc)? { + if let Some(paths) = fingerprint::parse_dep_info(context, &dep_info_loc)? { for path in paths { deps.insert(path); } diff --git a/tests/build.rs b/tests/build.rs index 523e42d91..8154caaf8 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -3856,9 +3856,11 @@ fn building_a_dependent_crate_witout_bin_should_fail() { )); } -#[cfg(any(target_os = "macos", target_os = "ios"))] #[test] fn uplift_dsym_of_bin_on_mac() { + if !cfg!(any(target_os = "macos", target_os = "ios")) { + return + } let p = project("foo") .file("Cargo.toml", r#" [project] diff --git a/tests/freshness.rs b/tests/freshness.rs index df4de3864..ff127a4f5 100644 --- a/tests/freshness.rs +++ b/tests/freshness.rs @@ -733,3 +733,38 @@ fn rebuild_if_environment_changes() { [RUNNING] `target[/]debug[/]env_change[EXE]` ", dir = p.url()))); } + +#[test] +fn no_rebuild_when_rename_dir() { + let p = project("foo") + .file("Cargo.toml", r#" + [package] + name = "bar" + version = "0.0.1" + authors = [] + + [dependencies] + foo = { path = "foo" } + "#) + .file("src/lib.rs", "") + .file("foo/Cargo.toml", r#" + [package] + name = "foo" + version = "0.0.1" + authors = [] + "#) + .file("foo/src/lib.rs", "") + .build(); + + assert_that(p.cargo("build"), execs().with_status(0)); + let mut new = p.root(); + new.pop(); + new.push("bar"); + fs::rename(p.root(), &new).unwrap(); + + assert_that(p.cargo("build").cwd(&new), + execs().with_status(0) + .with_stderr("\ +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..] +")); +}