|
| 1 | +mod default; |
| 2 | + |
| 3 | +/// This trait allows the user to customize the heuristic used to determine the |
| 4 | +/// relative frequency of a given byte in the dataset being searched. |
| 5 | +/// |
| 6 | +/// The use of this trait can have a dramatic impact on performance depending |
| 7 | +/// on the type of data being searched. The details of why are explained in the |
| 8 | +/// docs of [`prefilter::Prefilter`]. To summarize, the core algorithm uses a |
| 9 | +/// prefilter to quickly identify candidate matches that are later verified |
| 10 | +/// more slowly. This prefilter is implemented in terms of trying to find |
| 11 | +/// `rare` bytes at specific offsets that will occur less frequently in the |
| 12 | +/// dataset. While the concept of a `rare` byte is similar for most datasets, |
| 13 | +/// there are some specific datasets (like binary executables) that have |
| 14 | +/// dramatically different byte distributions. For these datasets customizing |
| 15 | +/// the byte frequency heuristic can have a massive impact on performance, and |
| 16 | +/// might even need to be done at runtime. |
| 17 | +/// |
| 18 | +/// The default implementation of `HeuristicFrequencyRank` reads from the |
| 19 | +/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This |
| 20 | +/// is optimal for most inputs, so if you are unsure of the impact of using a |
| 21 | +/// custom `HeuristicFrequencyRank` you should probably just use the default. |
| 22 | +/// |
| 23 | +/// # Example |
| 24 | +/// |
| 25 | +/// ``` |
| 26 | +/// use memchr::memmem::{FinderBuilder, HeuristicFrequencyRank}; |
| 27 | +/// |
| 28 | +/// /// A byte-frequency table that is good for scanning binary executables. |
| 29 | +/// struct Binary; |
| 30 | +/// |
| 31 | +/// impl HeuristicFrequencyRank for Binary { |
| 32 | +/// fn rank(&self, byte: u8) -> u8 { |
| 33 | +/// const TABLE: [u8; 256] = [ |
| 34 | +/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, |
| 35 | +/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, |
| 36 | +/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, |
| 37 | +/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, |
| 38 | +/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, |
| 39 | +/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, |
| 40 | +/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, |
| 41 | +/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, |
| 42 | +/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, |
| 43 | +/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, |
| 44 | +/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, |
| 45 | +/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, |
| 46 | +/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, |
| 47 | +/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, |
| 48 | +/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, |
| 49 | +/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, |
| 50 | +/// ]; |
| 51 | +/// TABLE[byte as usize] |
| 52 | +/// } |
| 53 | +/// } |
| 54 | +/// // Create a new finder with the custom heuristic. |
| 55 | +/// let finder = FinderBuilder::new() |
| 56 | +/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd"); |
| 57 | +/// // Find needle with custom heuristic. |
| 58 | +/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some()); |
| 59 | +/// ``` |
| 60 | +pub trait HeuristicFrequencyRank { |
| 61 | + /// Return the heuristic frequency rank of the given byte. A lower rank |
| 62 | + /// means the byte is believed to occur less frequently in the haystack. |
| 63 | + /// |
| 64 | + /// Some uses of this heuristic may treat arbitrary absolute rank values as |
| 65 | + /// significant. For example, an implementation detail in this crate may |
| 66 | + /// determine that heuristic prefilters are inappropriate if every byte in |
| 67 | + /// the needle has a "high" rank. |
| 68 | + fn rank(&self, byte: u8) -> u8; |
| 69 | +} |
| 70 | + |
| 71 | +/// The default byte frequency heuristic that is good for most haystacks. |
| 72 | +pub(crate) struct DefaultFrequencyRank; |
| 73 | + |
| 74 | +impl HeuristicFrequencyRank for DefaultFrequencyRank { |
| 75 | + fn rank(&self, byte: u8) -> u8 { |
| 76 | + self::default::RANK[usize::from(byte)] |
| 77 | + } |
| 78 | +} |
| 79 | + |
| 80 | +/// This permits passing any implementation of `HeuristicFrequencyRank` as a |
| 81 | +/// borrowed version of itself. |
| 82 | +impl<'a, R> HeuristicFrequencyRank for &'a R |
| 83 | +where |
| 84 | + R: HeuristicFrequencyRank, |
| 85 | +{ |
| 86 | + fn rank(&self, byte: u8) -> u8 { |
| 87 | + (**self).rank(byte) |
| 88 | + } |
| 89 | +} |
0 commit comments