Skip to content

Commit

Permalink
Merge branch 'fix-gix-index-from-tree'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Oct 12, 2022
2 parents 6be3207 + c8d0345 commit da5f63c
Show file tree
Hide file tree
Showing 28 changed files with 235 additions and 104 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions git-index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ git-hash = { version = "^0.9.11", path = "../git-hash" }
git-bitmap = { version = "^0.1.2", path = "../git-bitmap" }
git-object = { version = "^0.22.0", path = "../git-object" }
git-traverse = { version = "^0.18.0", path = "../git-traverse" }
git-lock = { version = "2.1.1", path = "../git-lock" }

thiserror = "1.0.32"
memmap2 = "0.5.0"
Expand Down
5 changes: 5 additions & 0 deletions git-index/src/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ impl State {
self.version
}

/// Return the kind of hashes used in this instance.
pub fn object_hash(&self) -> git_hash::Kind {
self.object_hash
}

/// Return our entries
pub fn entries(&self) -> &[Entry] {
&self.entries
Expand Down
10 changes: 5 additions & 5 deletions git-index/src/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@ use git_features::parallel::InOrderIter;
use crate::util::read_u32;

/// Options to define how to decode an index state [from bytes][State::from_bytes()].
#[derive(Default)]
#[derive(Default, Clone, Copy)]
pub struct Options {
/// The kind of object hash to assume when decoding object ids.
pub object_hash: git_hash::Kind,
/// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)|None, use as many threads
/// as there are logical cores.
///
Expand All @@ -46,12 +44,13 @@ pub struct Options {
}

impl State {
/// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through.
/// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash`
/// to be used through the file.
pub fn from_bytes(
data: &[u8],
timestamp: FileTime,
object_hash: git_hash::Kind,
Options {
object_hash,
thread_limit,
min_extension_block_in_bytes_for_threading,
}: Options,
Expand Down Expand Up @@ -210,6 +209,7 @@ impl State {

Ok((
State {
object_hash,
timestamp,
version,
entries,
Expand Down
23 changes: 19 additions & 4 deletions git-index/src/file/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ pub use error::Error;

/// Initialization
impl File {
/// Open an index file at `path` with `options`.
pub fn at(path: impl Into<PathBuf>, options: decode::Options) -> Result<Self, Error> {
/// Open an index file at `path` with `options`, assuming `object_hash` is used throughout the file.
pub fn at(path: impl Into<PathBuf>, object_hash: git_hash::Kind, options: decode::Options) -> Result<Self, Error> {
let path = path.into();
let (data, mtime) = {
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
Expand All @@ -34,7 +34,22 @@ impl File {
(data, filetime::FileTime::from_last_modification_time(&file.metadata()?))
};

let (state, checksum) = State::from_bytes(&data, mtime, options)?;
Ok(File { state, path, checksum })
let (state, checksum) = State::from_bytes(&data, mtime, object_hash, options)?;
Ok(File {
state,
path,
checksum: Some(checksum),
})
}

/// Consume `state` and pretend it was read from `path`, setting our checksum to `null`.
///
/// `File` instances created like that should be written to disk to set the correct checksum via `[File::write()]`.
pub fn from_state(state: crate::State, path: impl Into<PathBuf>) -> Self {
File {
state,
path: path.into(),
checksum: None,
}
}
}
33 changes: 33 additions & 0 deletions git-index/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,39 @@ mod impl_ {
}
}
}

mod access {
use crate::File;

/// Consumption
impl File {
/// Take the state and discard the rest.
pub fn into_state(self) -> crate::State {
self.state
}

/// Take all non-copy parts of the index.
pub fn into_parts(self) -> (crate::State, std::path::PathBuf) {
(self.state, self.path)
}
}

/// Access
impl File {
/// The path from which the index was read or to which it is supposed to be written when used with [`File::from_state()`].
pub fn path(&self) -> &std::path::Path {
&self.path
}

/// The checksum over the file that was read or written to disk, or `None` if the state in memory was never serialized.
///
/// Note that even if `Some`, it will only represent the state in memory right after reading or [writing][File::write()].
pub fn checksum(&self) -> Option<git_hash::ObjectId> {
self.checksum
}
}
}

///
pub mod init;
///
Expand Down
11 changes: 7 additions & 4 deletions git-index/src/file/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,28 @@ mod error {
actual: git_hash::ObjectId,
expected: git_hash::ObjectId,
},
#[error("Checksum of in-memory index wasn't computed yet")]
NoChecksum,
}
}
pub use error::Error;

impl File {
/// Verify the integrity of the index to assure its consistency.
pub fn verify_integrity(&self) -> Result<(), Error> {
let num_bytes_to_hash = self.path.metadata()?.len() - self.checksum.as_bytes().len() as u64;
let checksum = self.checksum.ok_or(Error::NoChecksum)?;
let num_bytes_to_hash = self.path.metadata()?.len() - checksum.as_bytes().len() as u64;
let should_interrupt = AtomicBool::new(false);
let actual = git_features::hash::bytes_of_file(
&self.path,
num_bytes_to_hash as usize,
self.checksum.kind(),
checksum.kind(),
&mut git_features::progress::Discard,
&should_interrupt,
)?;
(actual == self.checksum).then(|| ()).ok_or(Error::ChecksumMismatch {
(actual == checksum).then(|| ()).ok_or(Error::ChecksumMismatch {
actual,
expected: self.checksum,
expected: checksum,
})
}
}
41 changes: 38 additions & 3 deletions git-index/src/file/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,50 @@ use git_features::hash;

use crate::{write, File, Version};

/// The error produced by [`File::write()`].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("Could not acquire lock for index file")]
AcquireLock(#[from] git_lock::acquire::Error),
#[error("Could not commit lock for index file")]
CommitLock(#[from] git_lock::commit::Error<git_lock::File>),
}

impl File {
/// Write the index to `out` with `options`, to be readable by [`File::at()`], returning the version that was actually written
/// to retain all information of this index.
pub fn write_to(&self, mut out: impl std::io::Write, options: write::Options) -> std::io::Result<Version> {
let mut hasher = hash::Write::new(&mut out, options.hash_kind);
pub fn write_to(
&self,
mut out: impl std::io::Write,
options: write::Options,
) -> std::io::Result<(Version, git_hash::ObjectId)> {
let mut hasher = hash::Write::new(&mut out, self.state.object_hash);
let version = self.state.write_to(&mut hasher, options)?;

let hash = hasher.hash.digest();
out.write_all(&hash)?;
Ok(version)
Ok((version, git_hash::ObjectId::from(hash)))
}

/// Write ourselves to the path we were read from after acquiring a lock, using `options`.
///
/// Note that the hash produced will be stored which is why we need to be mutable.
pub fn write(&mut self, options: write::Options) -> Result<(), Error> {
let mut lock = std::io::BufWriter::new(git_lock::File::acquire_to_update_resource(
&self.path,
git_lock::acquire::Fail::Immediately,
None,
)?);
let (version, digest) = self.write_to(&mut lock, options)?;
match lock.into_inner() {
Ok(lock) => lock.commit()?,
Err(err) => return Err(err.into_error().into()),
};
self.state.version = version;
self.checksum = Some(digest);
Ok(())
}
}
1 change: 1 addition & 0 deletions git-index/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ mod from_tree {
entries.sort_by(|a, b| Entry::cmp_filepaths(a.path_in(&path_backing), b.path_in(&path_backing)));

Ok(State {
object_hash: tree.kind(),
timestamp: filetime::FileTime::now(),
version: Version::V2,
entries,
Expand Down
10 changes: 7 additions & 3 deletions git-index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ pub struct Entry {
/// An index file whose state was read from a file on disk.
pub struct File {
/// The state containing the actual index data.
pub state: State,
pub(crate) state: State,
/// The path from which the index was read or to which it is supposed to be written.
pub path: PathBuf,
pub(crate) path: PathBuf,
/// The checksum of all bytes prior to the checksum itself.
pub checksum: git_hash::ObjectId,
pub(crate) checksum: Option<git_hash::ObjectId>,
}

/// The type to use and store paths to all entries.
Expand All @@ -81,6 +81,10 @@ pub type PathStorageRef = [u8];
/// We treat index and its state synonymous.
#[derive(Clone)]
pub struct State {
/// The kind of object hash used when storing the underlying file.
///
/// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
object_hash: git_hash::Kind,
/// The time at which the state was created, indicating its freshness compared to other files on disk.
///
/// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
Expand Down
13 changes: 2 additions & 11 deletions git-index/src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,13 @@ impl Extensions {
/// Note that default options write either index V2 or V3 depending on the content of the entries.
#[derive(Debug, Default, Clone, Copy)]
pub struct Options {
/// The hash kind to use when writing the index file.
///
/// It is not always possible to infer the hash kind when reading an index, so this is required.
pub hash_kind: git_hash::Kind,

/// Configures which extensions to write
pub extensions: Extensions,
}

impl State {
/// Serialize this instance to `out` with [`options`][Options].
pub fn write_to(
&self,
out: impl std::io::Write,
Options { hash_kind, extensions }: Options,
) -> std::io::Result<Version> {
pub fn write_to(&self, out: impl std::io::Write, Options { extensions }: Options) -> std::io::Result<Version> {
let version = self.detect_required_version();

let mut write = CountBytes::new(out);
Expand All @@ -83,7 +74,7 @@ impl State {
.is_some()
&& !extension_toc.is_empty()
{
extension::end_of_index_entry::write_to(out, hash_kind, offset_to_extensions, extension_toc)?
extension::end_of_index_entry::write_to(out, self.object_hash, offset_to_extensions, extension_toc)?
}

Ok(version)
Expand Down
35 changes: 35 additions & 0 deletions git-index/tests/index/file/init.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
mod from_state {
use crate::index::Fixture::*;
use git_index::Version::{V2, V3};

#[test]
fn writes_data_to_disk_and_is_a_valid_index() -> git_testtools::Result {
let fixtures = [
(Loose("extended-flags"), V3),
(Generated("v2"), V2),
(Generated("V2_empty"), V2),
(Generated("v2_more_files"), V2),
(Generated("v2_all_file_kinds"), V2),
(Generated("v4_more_files_IEOT"), V2),
];

for (fixture, expected_version) in fixtures {
let tmp = git_testtools::tempfile::TempDir::new()?;
let index_path = tmp.path().join(fixture.to_name());
assert!(!index_path.exists());

let index = git_index::File::at(fixture.to_path(), git_hash::Kind::Sha1, Default::default())?;
let mut index = git_index::File::from_state(index.into_state(), index_path.clone());
assert!(index.checksum().is_none());
assert_eq!(index.path(), index_path);

index.write(git_index::write::Options::default())?;
assert!(index.checksum().is_some(), "checksum is adjusted after writing");
assert!(index.path().is_file());
assert_eq!(index.version(), expected_version,);

index.verify_integrity()?;
}
Ok(())
}
}
1 change: 1 addition & 0 deletions git-index/tests/index/file/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod access;
mod init;
mod read;
mod write;
Loading

0 comments on commit da5f63c

Please sign in to comment.