@@ -479,31 +479,62 @@ pub trait Hasher {
479479/// 
480480/// # Note to Implementers 
481481/// 
482- /// The default implementation of this method includes a call to 
483- /// [`Self::write_length_prefix`], so if your implementation of `Hasher` 
484- /// doesn't care about prefix-freedom and you've thus overridden 
485- /// that method to do nothing, there's no need to override this one. 
486- /// 
487- /// This method is available to be overridden separately from the others 
488- /// as `str` being UTF-8 means that it never contains `0xFF` bytes, which 
489- /// can be used to provide prefix-freedom cheaper than hashing a length. 
490- /// 
491- /// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating 
492- /// them into a buffer), then you can hash the bytes of the `str` followed 
493- /// by a single `0xFF` byte. 
494- /// 
495- /// If your `Hasher` works in chunks, you can also do this by being careful 
496- /// about how you pad partial chunks.  If the chunks are padded with `0x00` 
497- /// bytes then just hashing an extra `0xFF` byte doesn't necessarily 
498- /// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash 
499- /// the same sequence of chunks.  But if you pad with `0xFF` bytes instead, 
500- /// ensuring at least one padding byte, then it can often provide 
501- /// prefix-freedom cheaper than hashing the length would. 
482+ /// There are at least two reasonable default ways to implement this. 
483+ /// Which one will be the default is not yet decided, so for now 
484+ /// you probably want to override it specifically. 
485+ /// 
486+ /// ## The general answer 
487+ /// 
488+ /// It's always correct to implement this with a length prefix: 
489+ /// 
490+ /// ``` 
491+ /// # #![feature(hasher_prefixfree_extras)] 
492+ /// # struct Foo; 
493+ /// # impl std::hash::Hasher for Foo { 
494+ /// # fn finish(&self) -> u64 { unimplemented!() } 
495+ /// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() } 
496+ /// fn write_str(&mut self, s: &str) { 
497+ ///     self.write_length_prefix(s.len()); 
498+ ///     self.write(s.as_bytes()); 
499+ /// } 
500+ /// # } 
501+ /// ``` 
502+ /// 
503+ /// And, if your `Hasher` works in `usize` chunks, this is likely a very 
504+ /// efficient way to do it, as anything more complicated may well end up 
505+ /// slower than just running the round with the length. 
506+ /// 
507+ /// ## If your `Hasher` works byte-wise 
508+ /// 
509+ /// One nice thing about `str` being UTF-8 is that the `b'\xFF'` byte 
510+ /// never happens.  That means that you can append that to the byte stream 
511+ /// being hashed and maintain prefix-freedom: 
512+ /// 
513+ /// ``` 
514+ /// # #![feature(hasher_prefixfree_extras)] 
515+ /// # struct Foo; 
516+ /// # impl std::hash::Hasher for Foo { 
517+ /// # fn finish(&self) -> u64 { unimplemented!() } 
518+ /// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() } 
519+ /// fn write_str(&mut self, s: &str) { 
520+ ///     self.write(s.as_bytes()); 
521+ ///     self.write_u8(0xff); 
522+ /// } 
523+ /// # } 
524+ /// ``` 
525+ /// 
526+ /// This does require that your implementation not add extra padding, and 
527+ /// thus generally requires that you maintain a buffer, running a round 
528+ /// only once that buffer is full (or `finish` is called). 
529+ /// 
530+ /// That's because if `write` pads data out to a fixed chunk size, it's 
531+ /// likely that it does it in such a way that `"a"` and `"a\x00"` would 
532+ /// end up hashing the same sequence of things, introducing conflicts. 
502533#[ inline]  
503534    #[ unstable( feature = "hasher_prefixfree_extras" ,  issue = "96762" ) ]  
504535    fn  write_str ( & mut  self ,  s :  & str )  { 
505-         self . write_length_prefix ( s. len ( ) ) ; 
506536        self . write ( s. as_bytes ( ) ) ; 
537+         self . write_u8 ( 0xff ) ; 
507538    } 
508539} 
509540
0 commit comments