Skip to content

Commit e49a1b8

Browse files
sentripBurntSushi
authored andcommitted
memmem: add new HeuristicFrequencyRank
This makes it possible for the caller to provide their own ranking function for individual bytes. This can potentially speed up searches if one has a better guess than the default for the frequency distribution of bytes in a particular haystack. There is a lot of ceremony here, and it basically boils down to supporting this in no-std no-alloc configurations. I was tempted to just require alloc for this sort of thing and ask for something like `Arc<dyn Fn(u8) -> u8>`, but that would require some ceremony of its own internally to deal with in the no-alloc case. And forcing an allocation for every searcher construction that uses a customer ranker feels like bad juju to me. Another choice would be to just ask for a `fn(u8) -> u8`, but this makes the case of "I analyzed a haystack at runtime to build my ranker" more difficult. Not impossible. But annoying. Yet another choice was to add the trait as in this commit, and then add it as a new type parameter to `FinderBuilder`. I believe this would work, but it requires complicating the public API even more and imposes constraints on the trait (for example, it would want to be `Clone` at least in order to avoid backwards incompatible changes in the `FinderBuilder` API). There's also just generally more ceremony with having to add a type parameter everywhere. Since we only need the ranking function at searcher construction time, we can ask for it at the time of construction and then get rid of it, thus avoiding it infecting everything else. Fixes #117, Closes #118, Closes #119
1 parent be564d4 commit e49a1b8

File tree

8 files changed

+261
-30
lines changed

8 files changed

+261
-30
lines changed

bench/src/memmem/byterank.rs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
use criterion::Criterion;
2+
use memchr::memmem::HeuristicFrequencyRank;
3+
4+
use crate::define;
5+
6+
pub(crate) fn all(c: &mut Criterion) {
7+
finder_construction(c);
8+
byte_frequencies(c);
9+
}
10+
11+
fn finder_construction(c: &mut Criterion) {
12+
// This benchmark is purely for measuring the time taken to create a
13+
// `Finder`. It is here to prevent regressions when adding new features
14+
// to the `Finder`, such as the ability to construct with a custom
15+
// `HeuristicFrequencyRank`.
16+
const NEEDLES: [&str; 3] = ["a", "abcd", "abcdefgh12345678"];
17+
18+
for needle in NEEDLES {
19+
define(
20+
c,
21+
&format!(
22+
"memmem/krate/bytefreq/construct-finder/default(len={})",
23+
needle.len()
24+
),
25+
needle.as_bytes(),
26+
Box::new(move |b| {
27+
b.iter(|| {
28+
memchr::memmem::FinderBuilder::new()
29+
.build_forward(needle.as_bytes())
30+
});
31+
}),
32+
);
33+
define(
34+
c,
35+
&format!(
36+
"memmem/krate/bytefreq/construct-finder/custom(len={})",
37+
needle.len()
38+
),
39+
needle.as_bytes(),
40+
Box::new(move |b| {
41+
b.iter(|| {
42+
memchr::memmem::FinderBuilder::new()
43+
.build_forward_with_ranker(Binary, needle.as_bytes())
44+
});
45+
}),
46+
);
47+
}
48+
}
49+
50+
fn byte_frequencies(c: &mut Criterion) {
51+
// This benchmark exists to demonstrate a common use case for
52+
// customizing the byte frequency table used by a `Finder`
53+
// and the relative performance gain from using an optimal table.
54+
// This is essentially why `HeuristicFrequencyRank` was added.
55+
56+
// Bytes we want to scan for that are rare in strings but common in
57+
// executables.
58+
const NEEDLE: &[u8] = b"\x00\x00\xdd\xdd'";
59+
60+
// The input for the benchmark is the benchmark binary itself
61+
let exe = std::env::args().next().unwrap();
62+
let corpus = std::fs::read(exe).unwrap();
63+
64+
let bin = corpus.clone();
65+
define(
66+
c,
67+
&format!("memmem/krate/bytefreq/default"),
68+
&corpus,
69+
Box::new(move |b| {
70+
let finder =
71+
memchr::memmem::FinderBuilder::new().build_forward(NEEDLE);
72+
b.iter(|| {
73+
assert_eq!(1, finder.find_iter(&bin).count());
74+
});
75+
}),
76+
);
77+
78+
let bin = corpus.clone();
79+
define(
80+
c,
81+
&format!("memmem/krate/bytefreq/custom"),
82+
&corpus,
83+
Box::new(move |b| {
84+
let finder = memchr::memmem::FinderBuilder::new()
85+
.build_forward_with_ranker(Binary, NEEDLE);
86+
b.iter(|| {
87+
assert_eq!(1, finder.find_iter(&bin).count());
88+
});
89+
}),
90+
);
91+
}
92+
93+
/// A byte-frequency table that is good for scanning binary executables.
94+
struct Binary;
95+
96+
impl HeuristicFrequencyRank for Binary {
97+
fn rank(&self, byte: u8) -> u8 {
98+
const TABLE: [u8; 256] = [
99+
255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89,
100+
58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, 68, 11, 9,
101+
6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5,
102+
5, 19, 11, 6, 17, 9, 9, 6, 8, 48, 58, 11, 14, 53, 40, 9, 9, 254,
103+
35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2,
104+
10, 16, 12, 6, 19, 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19,
105+
8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
106+
51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0,
107+
0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 12, 2, 1, 1, 3, 1, 1, 1,
108+
6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9,
109+
4, 5, 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
110+
3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, 16, 3, 5, 1, 6, 1,
111+
1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6,
112+
5, 3, 8, 6, 13, 175,
113+
];
114+
TABLE[byte as usize]
115+
}
116+
}

bench/src/memmem/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ use criterion::Criterion;
9696

9797
use crate::{define, memmem::inputs::INPUTS};
9898

99+
mod byterank;
99100
mod imp;
100101
mod inputs;
101102
mod sliceslice;
@@ -106,6 +107,7 @@ pub fn all(c: &mut Criterion) {
106107
oneshot_iter(c);
107108
prebuilt_iter(c);
108109
sliceslice::all(c);
110+
byterank::all(c);
109111
}
110112

111113
fn oneshot(c: &mut Criterion) {

src/memmem/byte_frequencies.rs renamed to src/memmem/byterank/default.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
pub const BYTE_FREQUENCIES: [u8; 256] = [
1+
pub const RANK: [u8; 256] = [
22
55, // '\x00'
33
52, // '\x01'
44
51, // '\x02'

src/memmem/byterank/mod.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
mod default;
2+
3+
/// This trait allows the user to customize the heuristic used to determine the
4+
/// relative frequency of a given byte in the dataset being searched.
5+
///
6+
/// The use of this trait can have a dramatic impact on performance depending
7+
/// on the type of data being searched. The details of why are explained in the
8+
/// docs of [`prefilter::Prefilter`]. To summarize, the core algorithm uses a
9+
/// prefilter to quickly identify candidate matches that are later verified
10+
/// more slowly. This prefilter is implemented in terms of trying to find
11+
/// `rare` bytes at specific offsets that will occur less frequently in the
12+
/// dataset. While the concept of a `rare` byte is similar for most datasets,
13+
/// there are some specific datasets (like binary executables) that have
14+
/// dramatically different byte distributions. For these datasets customizing
15+
/// the byte frequency heuristic can have a massive impact on performance, and
16+
/// might even need to be done at runtime.
17+
///
18+
/// The default implementation of `HeuristicFrequencyRank` reads from the
19+
/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This
20+
/// is optimal for most inputs, so if you are unsure of the impact of using a
21+
/// custom `HeuristicFrequencyRank` you should probably just use the default.
22+
///
23+
/// # Example
24+
///
25+
/// ```
26+
/// use memchr::memmem::{FinderBuilder, HeuristicFrequencyRank};
27+
///
28+
/// /// A byte-frequency table that is good for scanning binary executables.
29+
/// struct Binary;
30+
///
31+
/// impl HeuristicFrequencyRank for Binary {
32+
/// fn rank(&self, byte: u8) -> u8 {
33+
/// const TABLE: [u8; 256] = [
34+
/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17,
35+
/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
36+
/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11,
37+
/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
38+
/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27,
39+
/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
40+
/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24,
41+
/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
42+
/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15,
43+
/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
44+
/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0,
45+
/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
46+
/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
47+
/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
48+
/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5,
49+
/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
50+
/// ];
51+
/// TABLE[byte as usize]
52+
/// }
53+
/// }
54+
/// // Create a new finder with the custom heuristic.
55+
/// let finder = FinderBuilder::new()
56+
/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd");
57+
/// // Find needle with custom heuristic.
58+
/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some());
59+
/// ```
60+
pub trait HeuristicFrequencyRank {
61+
/// Return the heuristic frequency rank of the given byte. A lower rank
62+
/// means the byte is believed to occur less frequently in the haystack.
63+
///
64+
/// Some uses of this heuristic may treat arbitrary absolute rank values as
65+
/// significant. For example, an implementation detail in this crate may
66+
/// determine that heuristic prefilters are inappropriate if every byte in
67+
/// the needle has a "high" rank.
68+
fn rank(&self, byte: u8) -> u8;
69+
}
70+
71+
/// The default byte frequency heuristic that is good for most haystacks.
72+
pub(crate) struct DefaultFrequencyRank;
73+
74+
impl HeuristicFrequencyRank for DefaultFrequencyRank {
75+
fn rank(&self, byte: u8) -> u8 {
76+
self::default::RANK[usize::from(byte)]
77+
}
78+
}
79+
80+
/// This permits passing any implementation of `HeuristicFrequencyRank` as a
81+
/// borrowed version of itself.
82+
impl<'a, R> HeuristicFrequencyRank for &'a R
83+
where
84+
R: HeuristicFrequencyRank,
85+
{
86+
fn rank(&self, byte: u8) -> u8 {
87+
(**self).rank(byte)
88+
}
89+
}

src/memmem/mod.rs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,12 @@ assert_eq!(None, finder.find(b"quux baz bar"));
6666
```
6767
*/
6868

69-
pub use self::prefilter::Prefilter;
69+
pub use self::{byterank::HeuristicFrequencyRank, prefilter::Prefilter};
7070

7171
use crate::{
7272
cow::CowBytes,
7373
memmem::{
74+
byterank::DefaultFrequencyRank,
7475
prefilter::{Pre, PrefilterFn, PrefilterState},
7576
rabinkarp::NeedleHash,
7677
rarebytes::RareNeedleBytes,
@@ -145,7 +146,7 @@ macro_rules! define_memmem_simple_tests {
145146
};
146147
}
147148

148-
mod byte_frequencies;
149+
mod byterank;
149150
#[cfg(memchr_runtime_simd)]
150151
mod genericsimd;
151152
mod prefilter;
@@ -712,7 +713,24 @@ impl FinderBuilder {
712713
&self,
713714
needle: &'n B,
714715
) -> Finder<'n> {
715-
Finder { searcher: Searcher::new(self.config, needle.as_ref()) }
716+
self.build_forward_with_ranker(DefaultFrequencyRank, needle)
717+
}
718+
719+
/// Build a forward finder using the given needle and a custom heuristic for
720+
/// determining the frequency of a given byte in the dataset.
721+
/// See [`HeuristicFrequencyRank`] for more details.
722+
pub fn build_forward_with_ranker<
723+
'n,
724+
R: HeuristicFrequencyRank,
725+
B: ?Sized + AsRef<[u8]>,
726+
>(
727+
&self,
728+
ranker: R,
729+
needle: &'n B,
730+
) -> Finder<'n> {
731+
Finder {
732+
searcher: Searcher::new(self.config, ranker, needle.as_ref()),
733+
}
716734
}
717735

718736
/// Build a reverse finder using the given needle from the current
@@ -817,14 +835,19 @@ enum SearcherKind {
817835
}
818836

819837
impl<'n> Searcher<'n> {
820-
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
838+
fn new<R: HeuristicFrequencyRank>(
839+
config: SearcherConfig,
840+
ranker: R,
841+
needle: &'n [u8],
842+
) -> Searcher<'n> {
821843
use self::SearcherKind::*;
822844

823-
let ninfo = NeedleInfo::new(needle);
845+
let ninfo = NeedleInfo::new(&ranker, needle);
824846
let mk = |kind: SearcherKind| {
825847
let prefn = prefilter::forward(
826848
&config.prefilter,
827849
&ninfo.rarebytes,
850+
ranker,
828851
needle,
829852
);
830853
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
@@ -1010,9 +1033,12 @@ impl<'n> Searcher<'n> {
10101033
}
10111034

10121035
impl NeedleInfo {
1013-
pub(crate) fn new(needle: &[u8]) -> NeedleInfo {
1036+
pub(crate) fn new<R: HeuristicFrequencyRank>(
1037+
ranker: &R,
1038+
needle: &[u8],
1039+
) -> NeedleInfo {
10141040
NeedleInfo {
1015-
rarebytes: RareNeedleBytes::forward(needle),
1041+
rarebytes: RareNeedleBytes::forward(ranker, needle),
10161042
nhash: NeedleHash::forward(needle),
10171043
}
10181044
}

src/memmem/prefilter/fallback.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,10 @@ mod tests {
9393
use super::*;
9494

9595
fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
96-
let ninfo = NeedleInfo::new(needle);
96+
let ninfo = NeedleInfo::new(
97+
&crate::memmem::byterank::DefaultFrequencyRank,
98+
needle,
99+
);
97100
let mut prestate = PrefilterState::new();
98101
find(&mut prestate, &ninfo, haystack, needle)
99102
}

src/memmem/prefilter/mod.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo};
1+
use crate::memmem::{
2+
rarebytes::RareNeedleBytes, HeuristicFrequencyRank, NeedleInfo,
3+
};
24

35
mod fallback;
46
#[cfg(memchr_runtime_simd)]
@@ -287,9 +289,10 @@ impl PrefilterState {
287289
/// is the default). In general, we try to use an AVX prefilter, followed by
288290
/// SSE and then followed by a generic one based on memchr.
289291
#[inline(always)]
290-
pub(crate) fn forward(
292+
pub(crate) fn forward<R: HeuristicFrequencyRank>(
291293
config: &Prefilter,
292294
rare: &RareNeedleBytes,
295+
ranker: R,
293296
needle: &[u8],
294297
) -> Option<PrefilterFn> {
295298
if config.is_none() || needle.len() <= 1 {
@@ -327,7 +330,8 @@ pub(crate) fn forward(
327330
// Check that our rarest byte has a reasonably low rank. The main issue
328331
// here is that the fallback prefilter can perform pretty poorly if it's
329332
// given common bytes. So we try to avoid the worst cases here.
330-
let (rare1_rank, _) = rare.as_ranks(needle);
333+
let (rare1, _) = rare.as_rare_bytes(needle);
334+
let rare1_rank = usize::from(ranker.rank(rare1));
331335
if rare1_rank <= MAX_FALLBACK_RANK {
332336
// SAFETY: fallback::find is safe to call in all environments.
333337
return unsafe { Some(PrefilterFn::new(fallback::find)) };

0 commit comments

Comments
 (0)