Skip to content

Commit

Permalink
feat: Only save md5 in lock file if no sha256 is present (#764)
Browse files Browse the repository at this point in the history
By saving only one of those two hashes, we reduce the lock file size a
bit. This should be especially noticeable in git repos, since hashes
compress poorly.

This PR is a contribution to improving
prefix-dev/pixi#1509
  • Loading branch information
Hofer-Julian authored Jul 5, 2024
1 parent 116087b commit 4ee9217
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 3,363 deletions.
34 changes: 11 additions & 23 deletions crates/rattler_lock/src/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,15 @@ use serde_with::skip_serializing_none;
/// This implementation of the `Deserialize` trait for the `PackageHashes` struct
///
/// It expects the input to have either a `md5` field, a `sha256` field, or both.
/// If both fields are present, it constructs a `Md5Sha256` instance with their values.
/// If the `sha256` field is present, it constructs a `Sha256` instance with its value.
/// If only the `md5` field is present, it constructs a `Md5` instance with its value.
/// If only the `sha256` field is present, it constructs a `Sha256` instance with its value.
/// If neither field is present it returns an error
#[derive(Eq, PartialEq, Hash, Clone, Debug)]
pub enum PackageHashes {
/// Contains an MD5 hash
Md5(Md5Hash),
/// Contains as Sha256 Hash
Sha256(Sha256Hash),
/// Contains both hashes
Md5Sha256(Md5Hash, Sha256Hash),
}

impl PartialOrd for PackageHashes {
Expand All @@ -36,11 +33,10 @@ impl Ord for PackageHashes {
impl PackageHashes {
/// Create correct enum from hashes
pub fn from_hashes(md5: Option<Md5Hash>, sha256: Option<Sha256Hash>) -> Option<PackageHashes> {
use PackageHashes::{Md5, Md5Sha256, Sha256};
use PackageHashes::{Md5, Sha256};
match (md5, sha256) {
(_, Some(sha256)) => Some(Sha256(sha256)),
(Some(md5), None) => Some(Md5(md5)),
(None, Some(sha256)) => Some(Sha256(sha256)),
(Some(md5), Some(sha256)) => Some(Md5Sha256(md5, sha256)),
(None, None) => None,
}
}
Expand All @@ -49,15 +45,15 @@ impl PackageHashes {
pub fn sha256(&self) -> Option<&Sha256Hash> {
match self {
PackageHashes::Md5(_) => None,
PackageHashes::Sha256(sha256) | PackageHashes::Md5Sha256(_, sha256) => Some(sha256),
PackageHashes::Sha256(sha256) => Some(sha256),
}
}

/// Returns the MD5 hash
pub fn md5(&self) -> Option<&Md5Hash> {
match self {
PackageHashes::Sha256(_) => None,
PackageHashes::Md5(md5) | PackageHashes::Md5Sha256(md5, _) => Some(md5),
PackageHashes::Md5(md5) => Some(md5),
}
}

Expand All @@ -66,7 +62,6 @@ impl PackageHashes {
match self {
PackageHashes::Sha256(sha256) => sha256.to_vec(),
PackageHashes::Md5(md5) => md5.to_vec(),
PackageHashes::Md5Sha256(md5, sha256) => [md5.to_vec(), sha256.to_vec()].concat(),
}
}
}
Expand All @@ -83,7 +78,7 @@ impl Serialize for PackageHashes {
where
S: Serializer,
{
use PackageHashes::{Md5, Md5Sha256, Sha256};
use PackageHashes::{Md5, Sha256};
let raw = match self {
Md5(hash) => RawPackageHashes {
md5: Some(SerializableHash::from(*hash)),
Expand All @@ -93,10 +88,6 @@ impl Serialize for PackageHashes {
md5: None,
sha256: Some(SerializableHash::from(*hash)),
},
Md5Sha256(md5hash, sha) => RawPackageHashes {
md5: Some(SerializableHash::from(*md5hash)),
sha256: Some(SerializableHash::from(*sha)),
},
};
raw.serialize(serializer)
}
Expand All @@ -105,24 +96,22 @@ impl Serialize for PackageHashes {
// This implementation of the `Deserialize` trait for the `PackageHashes` struct
//
// It expects the input to have either a `md5` field, a `sha256` field, or both.
// If both fields are present, it constructs a `Md5Sha256` instance with their values.
// If the `sha256` field is present, it constructs a `Sha256` instance with its value.
// If only the `md5` field is present, it constructs a `Md5` instance with its value.
// If only the `sha256` field is present, it constructs a `Sha256` instance with its value.
// If neither field is present it returns an error
impl<'de> Deserialize<'de> for PackageHashes {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
use PackageHashes::{Md5, Md5Sha256, Sha256};
use PackageHashes::{Md5, Sha256};
let temp = RawPackageHashes::deserialize(deserializer)?;
Ok(match (temp.md5, temp.sha256) {
(Some(md5), Some(sha)) => Md5Sha256(md5.into(), sha.into()),
(_, Some(sha)) => Sha256(sha.into()),
(Some(md5), None) => Md5(md5.into()),
(None, Some(sha)) => Sha256(sha.into()),
_ => {
return Err(D::Error::custom(
"Expected `sha256` field `md5` field or both",
"Expected `sha256` field, `md5` field or both",
))
}
})
Expand All @@ -140,9 +129,8 @@ mod test {
md5: 4eccaeba205f0aed9ac3a9ea58568ca3
sha256: f240217476e148e825420c6bc3a0c0efb08c0718b7042fae960400c02af858a3
"#;

let result: PackageHashes = from_str(yaml).unwrap();
assert!(matches!(result, PackageHashes::Md5Sha256(_, _)));
assert!(matches!(result, PackageHashes::Sha256(_)));

let yaml = r#"
md5: 4eccaeba205f0aed9ac3a9ea58568ca3
Expand Down
6 changes: 2 additions & 4 deletions crates/rattler_lock/src/parse/v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,11 @@ pub fn parse_v3_or_lower(
let pkg: EnvironmentPackageData = match kind {
LockedPackageKindV3::Conda(value) => {
let md5 = match value.hash {
PackageHashes::Md5(md5) | PackageHashes::Md5Sha256(md5, _) => Some(md5),
PackageHashes::Md5(md5) => Some(md5),
PackageHashes::Sha256(_) => None,
};
let sha256 = match value.hash {
PackageHashes::Sha256(sha256) | PackageHashes::Md5Sha256(_, sha256) => {
Some(sha256)
}
PackageHashes::Sha256(sha256) => Some(sha256),
PackageHashes::Md5(_) => None,
};

Expand Down
Loading

0 comments on commit 4ee9217

Please sign in to comment.