Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 134 additions & 1 deletion src/utils/multihash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub mod prelude {
pub use multihash_codetable::MultihashDigest as _;
}

use multihash_derive::MultihashDigest;
use multihash_derive::{Hasher, MultihashDigest};

/// Extends [`multihash_codetable::Code`] with `Identity`
#[derive(Clone, Copy, Debug, Eq, MultihashDigest, PartialEq)]
Expand Down Expand Up @@ -76,6 +76,106 @@ pub enum MultihashCode {
Ripemd320,
}

impl MultihashCode {
/// Calculate the [`Multihash`] of the input byte stream.
pub fn digest_byte_stream<R: std::io::Read>(&self, bytes: &mut R) -> anyhow::Result<Multihash> {
fn hash<'a, H: Hasher, R: std::io::Read>(
hasher: &'a mut H,
bytes: &'a mut R,
) -> anyhow::Result<&'a [u8]> {
let mut buf = [0; 1024];
loop {
let n = bytes.read(&mut buf)?;
if n == 0 {
break;
}
if let Some(b) = buf.get(0..n) {
hasher.update(b);
}
}
Ok(hasher.finalize())
Comment on lines +86 to +96
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let mut buf = [0; 1024];
loop {
let n = bytes.read(&mut buf)?;
if n == 0 {
break;
}
if let Some(b) = buf.get(0..n) {
hasher.update(b);
}
}
Ok(hasher.finalize())
while let Some(n) = bytes.read(&mut buf).ok() {
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(hasher.finalize())

nit: some code golf if you want

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the io::Error from .read should be escalated here.
Then I get

warning: irrefutable `while let` pattern
  --> src/utils/multihash.rs:96:19
   |
96 |             while let n = bytes.read(&mut buf)? {
   |                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   |
   = note: this pattern will always match, so the loop will never exit
   = help: consider instead using a `loop { ... }` with a `let` inside it
   = note: `#[warn(irrefutable_let_patterns)]` on by default
...
error: slicing may panic
   --> src/utils/multihash.rs:100:32
    |
100 |                 hasher.update(&buf[..n]);
    |                                ^^^^^^^^
    |
    = help: consider using `.get(..n)`or `.get_mut(..n)` instead
    = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#indexing_slicing

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be a while loop like below but I'm not sure if it's better than a loop loop, what do you think?

let mut n = 0;
            while {
                n = bytes.read(&mut buf)?;
                n > 0
            } {
                if let Some(b) = buf.get(0..n) {
                    hasher.update(b);
                }
            }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's leave the original version, then.

}

Ok(match self {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: do you think it's a good opportunity for a macro to avoid code duplication?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think a macro would significantly reduce the LoC, but it would hurt readability. What do you think?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No strong position here, I don't expect this code to be updated with many new algorithms anytime soon in the future. We can leave it as is.

Self::Sha2_256 => {
let mut hasher = multihash_codetable::Sha2_256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Sha2_512 => {
let mut hasher = multihash_codetable::Sha2_512::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Sha3_224 => {
let mut hasher = multihash_codetable::Sha3_224::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Sha3_256 => {
let mut hasher = multihash_codetable::Sha3_256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Sha3_384 => {
let mut hasher = multihash_codetable::Sha3_384::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Sha3_512 => {
let mut hasher = multihash_codetable::Sha3_512::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Keccak224 => {
let mut hasher = multihash_codetable::Keccak224::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Keccak256 => {
let mut hasher = multihash_codetable::Keccak256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Keccak384 => {
let mut hasher = multihash_codetable::Keccak384::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Keccak512 => {
let mut hasher = multihash_codetable::Keccak512::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Blake2b256 => {
let mut hasher = multihash_codetable::Blake2b256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Blake2b512 => {
let mut hasher = multihash_codetable::Blake2b512::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Blake2s128 => {
let mut hasher = multihash_codetable::Blake2s128::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Blake2s256 => {
let mut hasher = multihash_codetable::Blake2s256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Blake3_256 => {
let mut hasher = multihash_codetable::Blake3_256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Ripemd160 => {
let mut hasher = multihash_codetable::Ripemd160::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Ripemd256 => {
let mut hasher = multihash_codetable::Ripemd256::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
Self::Ripemd320 => {
let mut hasher = multihash_codetable::Ripemd320::default();
self.wrap(hash(&mut hasher, bytes)?)?
}
_ => {
anyhow::bail!("`digest_byte_stream` is unimplemented for {self:?}");
}
})
}
}

/// Identity hasher with a maximum size.
///
/// # Panics
Expand Down Expand Up @@ -113,3 +213,36 @@ impl<const S: usize> multihash_derive::Hasher for IdentityHasher<S> {
self.i = 0
}
}

#[cfg(test)]
mod tests {
use std::io::Cursor;

use super::*;
use crate::utils::rand::forest_rng;
use rand::RngCore as _;

#[test]
fn test_digest_byte_stream() {
use MultihashCode::*;

for len in [0, 1, 100, 1024, 10000] {
let mut bytes = vec![0; len];
forest_rng().fill_bytes(&mut bytes);
let mut cursor = Cursor::new(bytes.clone());
for code in [
Sha2_256, Sha2_512, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Keccak224, Keccak256,
Keccak384, Keccak512, Blake2b256, Blake2b512, Blake2s128, Blake2s256, Blake3_256,
Ripemd160, Ripemd256, Ripemd320,
] {
cursor.set_position(0);
let mh1 = code.digest(&bytes);
let mh2 = code.digest_byte_stream(&mut cursor).unwrap();
assert_eq!(mh1, mh2);
}

cursor.set_position(0);
Identity.digest_byte_stream(&mut cursor).unwrap_err();
}
}
}
Loading