version = "29.0.0"
optional = true
-[dependencies.sha1]
-version = "0.10.0"
-optional = true
-
-[dependencies.sha1_smol]
-version = "1.0.0"
-optional = true
-
[dependencies.thiserror]
version = "2.0.0"
optional = true
cache-efficiency-debug = []
crc32 = ["dep:crc32fast"]
default = []
-fast-sha1 = ["dep:sha1"]
+fast-sha1 = []
fs-read-dir = ["dep:gix-utils"]
fs-walkdir-parallel = [
"dep:jwalk",
progress-unit-bytes = [
"dep:bytesize",
"prodash?/unit-bytes",
+ "gix-hash/progress-unit-bytes",
]
progress-unit-human-numbers = ["prodash?/unit-human"]
-rustsha1 = ["dep:sha1_smol"]
+rustsha1 = []
tracing = ["gix-trace/tracing"]
tracing-detail = ["gix-trace/tracing-detail"]
walkdir = [
"flate2?/zlib",
]
-[target.'cfg(all(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"), not(target_os = "windows")))'.dependencies.sha1]
-version = "0.10.0"
-features = ["asm"]
-optional = true
-
[target."cfg(unix)".dependencies.libc]
version = "0.2.119"
//! Hash functions and hash utilities
-//!
-//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
-//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
-//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
-#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
-mod _impl {
- use super::Digest;
-
- /// A implementation of the Sha1 hash, which can be used once.
- #[derive(Default, Clone)]
- pub struct Sha1(sha1_smol::Sha1);
-
- impl Sha1 {
- /// Digest the given `bytes`.
- pub fn update(&mut self, bytes: &[u8]) {
- self.0.update(bytes);
- }
- /// Finalize the hash and produce a digest.
- pub fn digest(self) -> Digest {
- self.0.digest().bytes()
- }
- }
-}
-
-/// A hash-digest produced by a [`Hasher`] hash implementation.
-#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
-pub type Digest = [u8; 20];
-
-#[cfg(feature = "fast-sha1")]
-mod _impl {
- use sha1::Digest;
-
- /// A implementation of the Sha1 hash, which can be used once.
- #[derive(Default, Clone)]
- pub struct Sha1(sha1::Sha1);
-
- impl Sha1 {
- /// Digest the given `bytes`.
- pub fn update(&mut self, bytes: &[u8]) {
- self.0.update(bytes);
- }
- /// Finalize the hash and produce a digest.
- pub fn digest(self) -> super::Digest {
- self.0.finalize().into()
- }
- }
-}
+// TODO: Remove this.
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-pub use _impl::Sha1 as Hasher;
+pub use gix_hash::hasher::{
+ hasher,
+ io::{bytes, bytes_of_file, bytes_with_hasher, Write},
+ Digest, Hasher,
+};
/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
///
h.update(bytes);
h.finalize()
}
-
-/// Produce a hasher suitable for the given kind of hash.
-#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-pub fn hasher(kind: gix_hash::Kind) -> Hasher {
- match kind {
- gix_hash::Kind::Sha1 => Hasher::default(),
- }
-}
-
-/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
-/// while initializing and calling `progress`.
-///
-/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
-/// denoting the amount of bytes to hash starting from the beginning of the file.
-///
-/// # Note
-///
-/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
-/// [`gix_hash::ObjectId`] return value.
-/// * [Interrupts][crate::interrupt] are supported.
-#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
-pub fn bytes_of_file(
- path: &std::path::Path,
- num_bytes_from_start: u64,
- kind: gix_hash::Kind,
- progress: &mut dyn crate::progress::Progress,
- should_interrupt: &std::sync::atomic::AtomicBool,
-) -> std::io::Result<gix_hash::ObjectId> {
- bytes(
- &mut std::fs::File::open(path)?,
- num_bytes_from_start,
- kind,
- progress,
- should_interrupt,
- )
-}
-
-/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
-#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
-pub fn bytes(
- read: &mut dyn std::io::Read,
- num_bytes_from_start: u64,
- kind: gix_hash::Kind,
- progress: &mut dyn crate::progress::Progress,
- should_interrupt: &std::sync::atomic::AtomicBool,
-) -> std::io::Result<gix_hash::ObjectId> {
- bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
-}
-
-/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
-#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
-pub fn bytes_with_hasher(
- read: &mut dyn std::io::Read,
- num_bytes_from_start: u64,
- mut hasher: Hasher,
- progress: &mut dyn crate::progress::Progress,
- should_interrupt: &std::sync::atomic::AtomicBool,
-) -> std::io::Result<gix_hash::ObjectId> {
- let start = std::time::Instant::now();
- // init progress before the possibility for failure, as convenience in case people want to recover
- progress.init(
- Some(num_bytes_from_start as prodash::progress::Step),
- crate::progress::bytes(),
- );
-
- const BUF_SIZE: usize = u16::MAX as usize;
- let mut buf = [0u8; BUF_SIZE];
- let mut bytes_left = num_bytes_from_start;
-
- while bytes_left > 0 {
- let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
- read.read_exact(out)?;
- bytes_left -= out.len() as u64;
- progress.inc_by(out.len());
- hasher.update(out);
- if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
- return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
- }
- }
-
- let id = gix_hash::ObjectId::from(hasher.digest());
- progress.show_throughput(start);
- Ok(id)
-}
-
-#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-mod write {
- use crate::hash::Hasher;
-
- /// A utility to automatically generate a hash while writing into an inner writer.
- pub struct Write<T> {
- /// The hash implementation.
- pub hash: Hasher,
- /// The inner writer.
- pub inner: T,
- }
-
- impl<T> std::io::Write for Write<T>
- where
- T: std::io::Write,
- {
- fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
- let written = self.inner.write(buf)?;
- self.hash.update(&buf[..written]);
- Ok(written)
- }
-
- fn flush(&mut self) -> std::io::Result<()> {
- self.inner.flush()
- }
- }
-
- impl<T> Write<T>
- where
- T: std::io::Write,
- {
- /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
- pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
- match object_hash {
- gix_hash::Kind::Sha1 => Write {
- inner,
- hash: Hasher::default(),
- },
- }
- }
- }
-}
-#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-pub use write::Write;
[dependencies.faster-hex]
version = "0.9.0"
+[dependencies.prodash]
+version = "29"
+
[dependencies.serde]
version = "1.0.114"
features = ["derive"]
optional = true
default-features = false
+[dependencies.sha1-checked]
+version = "0.10.0"
+default-features = false
+
[dependencies.thiserror]
version = "2.0.0"
[dev-dependencies]
[features]
+progress-unit-bytes = ["prodash/unit-bytes"]
serde = ["dep:serde"]
[lints.clippy]
--- /dev/null
+use crate::{hasher, Hasher};
+
+// Temporary, to avoid a circular dependency on `gix-features`.
+///
+mod gix_features {
+ ///
+ pub mod progress {
+ pub use prodash::{self, unit, Progress, Unit};
+
+ ///
+ #[cfg(feature = "progress-unit-bytes")]
+ pub fn bytes() -> Option<Unit> {
+ Some(unit::dynamic_and_mode(
+ unit::Bytes,
+ unit::display::Mode::with_throughput().and_percentage(),
+ ))
+ }
+
+ ///
+ #[cfg(not(feature = "progress-unit-bytes"))]
+ pub fn bytes() -> Option<Unit> {
+ Some(unit::label_and_mode(
+ "B",
+ unit::display::Mode::with_throughput().and_percentage(),
+ ))
+ }
+ }
+}
+
+/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
+/// while initializing and calling `progress`.
+///
+/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
+/// denoting the amount of bytes to hash starting from the beginning of the file.
+///
+/// # Note
+///
+/// * Interrupts are supported.
+// TODO: Fix link to `gix_features::interrupt`.
+pub fn bytes_of_file(
+ path: &std::path::Path,
+ num_bytes_from_start: u64,
+ kind: crate::Kind,
+ progress: &mut dyn gix_features::progress::Progress,
+ should_interrupt: &std::sync::atomic::AtomicBool,
+) -> std::io::Result<crate::ObjectId> {
+ bytes(
+ &mut std::fs::File::open(path)?,
+ num_bytes_from_start,
+ kind,
+ progress,
+ should_interrupt,
+ )
+}
+
+/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
+pub fn bytes(
+ read: &mut dyn std::io::Read,
+ num_bytes_from_start: u64,
+ kind: crate::Kind,
+ progress: &mut dyn gix_features::progress::Progress,
+ should_interrupt: &std::sync::atomic::AtomicBool,
+) -> std::io::Result<crate::ObjectId> {
+ bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
+}
+
+/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
+pub fn bytes_with_hasher(
+ read: &mut dyn std::io::Read,
+ num_bytes_from_start: u64,
+ mut hasher: Hasher,
+ progress: &mut dyn gix_features::progress::Progress,
+ should_interrupt: &std::sync::atomic::AtomicBool,
+) -> std::io::Result<crate::ObjectId> {
+ let start = std::time::Instant::now();
+ // init progress before the possibility for failure, as convenience in case people want to recover
+ progress.init(
+ Some(num_bytes_from_start as gix_features::progress::prodash::progress::Step),
+ gix_features::progress::bytes(),
+ );
+
+ const BUF_SIZE: usize = u16::MAX as usize;
+ let mut buf = [0u8; BUF_SIZE];
+ let mut bytes_left = num_bytes_from_start;
+
+ while bytes_left > 0 {
+ let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
+ read.read_exact(out)?;
+ bytes_left -= out.len() as u64;
+ progress.inc_by(out.len());
+ hasher.update(out);
+ if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
+ return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
+ }
+ }
+
+ let id = crate::ObjectId::from(hasher.digest());
+ progress.show_throughput(start);
+ Ok(id)
+}
+
+/// A utility to automatically generate a hash while writing into an inner writer.
+pub struct Write<T> {
+ /// The hash implementation.
+ pub hash: Hasher,
+ /// The inner writer.
+ pub inner: T,
+}
+
+impl<T> std::io::Write for Write<T>
+where
+ T: std::io::Write,
+{
+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+ let written = self.inner.write(buf)?;
+ self.hash.update(&buf[..written]);
+ Ok(written)
+ }
+
+ fn flush(&mut self) -> std::io::Result<()> {
+ self.inner.flush()
+ }
+}
+
+impl<T> Write<T>
+where
+ T: std::io::Write,
+{
+ /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
+ pub fn new(inner: T, object_hash: crate::Kind) -> Self {
+ match object_hash {
+ crate::Kind::Sha1 => Write {
+ inner,
+ hash: Hasher::default(),
+ },
+ }
+ }
+}
--- /dev/null
+use sha1_checked::CollisionResult;
+
+/// A hash-digest produced by a [`Hasher`] hash implementation.
+pub type Digest = [u8; 20];
+
+/// The error returned by [`Hasher::try_finalize()`].
+#[derive(Debug, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error {
+ #[error("Detected SHA-1 collision attack with digest {digest}")]
+ CollisionAttack { digest: crate::ObjectId },
+}
+
+/// A implementation of the Sha1 hash, which can be used once.
+///
+/// We use [`sha1_checked`] to implement the same collision detection
+/// algorithm as Git.
+#[derive(Clone)]
+pub struct Hasher(sha1_checked::Sha1);
+
+impl Default for Hasher {
+ #[inline]
+ fn default() -> Self {
+ // This matches the configuration used by Git, which only uses
+ // the collision detection to bail out, rather than computing
+ // alternate “safe hashes” for inputs where a collision attack
+ // was detected.
+ Self(sha1_checked::Builder::default().safe_hash(false).build())
+ }
+}
+
+impl Hasher {
+ /// Digest the given `bytes`.
+ pub fn update(&mut self, bytes: &[u8]) {
+ use sha1_checked::Digest;
+ self.0.update(bytes);
+ }
+
+ /// Finalize the hash and produce an object ID.
+ ///
+ /// Returns [`Error`] if a collision attack is detected.
+ #[inline]
+ pub fn try_finalize(self) -> Result<crate::ObjectId, Error> {
+ match self.0.try_finalize() {
+ CollisionResult::Ok(digest) => Ok(crate::ObjectId::Sha1(digest.into())),
+ CollisionResult::Mitigated(_) => {
+ // SAFETY: `CollisionResult::Mitigated` is only
+ // returned when `safe_hash()` is on. `Hasher`’s field
+ // is private, and we only construct it in the
+ // `Default` instance, which turns `safe_hash()` off.
+ //
+ // As of Rust 1.84.1, the compiler can’t figure out
+ // this function cannot panic without this.
+ #[allow(unsafe_code)]
+ unsafe {
+ std::hint::unreachable_unchecked()
+ }
+ }
+ CollisionResult::Collision(digest) => Err(Error::CollisionAttack {
+ digest: crate::ObjectId::Sha1(digest.into()),
+ }),
+ }
+ }
+
+ /// Finalize the hash and produce an object ID.
+ #[inline]
+ pub fn finalize(self) -> crate::ObjectId {
+ self.try_finalize().expect("Detected SHA-1 collision attack")
+ }
+
+ /// Finalize the hash and produce a digest.
+ #[inline]
+ pub fn digest(self) -> Digest {
+ self.finalize()
+ .as_slice()
+ .try_into()
+ .expect("SHA-1 object ID to be 20 bytes long")
+ }
+}
+
+/// Produce a hasher suitable for the given kind of hash.
+#[inline]
+pub fn hasher(kind: crate::Kind) -> Hasher {
+ match kind {
+ crate::Kind::Sha1 => Hasher::default(),
+ }
+}
+
+/// Hashing utilities for I/O operations.
+pub mod io;
mod borrowed;
pub use borrowed::{oid, Error};
+/// Hash functions and hash utilities
+pub mod hasher;
+pub use hasher::io::{bytes, bytes_of_file, bytes_with_hasher};
+pub use hasher::{hasher, Hasher};
+
mod object_id;
pub use object_id::{decode, ObjectId};