Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions bench/src/memmem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ pub fn all(c: &mut Criterion) {
oneshot_iter(c);
prebuilt_iter(c);
sliceslice::all(c);
misc(c);
}

fn oneshot(c: &mut Criterion) {
Expand Down Expand Up @@ -381,3 +382,114 @@ fn prebuilt_iter(c: &mut Criterion) {
}
}
}

use memchr::memmem::HeuristicFrequencyRank;

fn misc(c: &mut Criterion) {
finder_construction(c);
byte_frequencies(c);
}

fn finder_construction(c: &mut Criterion) {
// This benchmark is purely for measuring the time taken to create a `Finder`.
// It is here to prevent regressions when adding new features to the `Finder`,
// such as the ability to construct with a custom `HeuristicFrequencyRank`.
const NEEDLES: [&str; 3] = ["a", "abcd", "abcdefgh12345678"];

for needle in NEEDLES {
define(
c,
&format!(
"memmem/krate/misc/construct-finder/default(len={})",
needle.len()
),
needle.as_bytes(),
Box::new(move |b| {
b.iter(|| {
memchr::memmem::FinderBuilder::new()
.build_forward(needle.as_bytes())
});
}),
);
define(
c,
&format!(
"memmem/krate/misc/construct-finder/custom(len={})",
needle.len()
),
needle.as_bytes(),
Box::new(move |b| {
b.iter(|| {
memchr::memmem::FinderBuilder::new()
.build_heuristic(needle.as_bytes(), Hfrx86)
});
}),
);
}
}

fn byte_frequencies(c: &mut Criterion) {
// This benchmark exists to demonstrate a common use case for
// customizing the byte frequency table used by a `Finder`
// and the relative performance gain from using an optimal table.
// This is essentially why `HeuristicFrequencyRank` was added.

// Bytes we want to scan for that are rare in strings but common in executables
const NEEDLE: &[u8] = b"\x00\x00\xdd\xdd'";

// The input for the benchmark is the benchmark binary itself
let exe = std::env::args().next().unwrap();
let corpus = std::fs::read(exe).unwrap();

let bin = corpus.clone();
define(
c,
&format!("memmem/krate/misc/frequency-table/default"),
&corpus,
Box::new(move |b| {
let finder =
memchr::memmem::FinderBuilder::new().build_forward(NEEDLE);
b.iter(|| {
assert_eq!(1, finder.find_iter(&bin).count());
});
}),
);

let bin = corpus.clone();
define(
c,
&format!("memmem/krate/misc/frequency-table/custom"),
&corpus,
Box::new(move |b| {
let finder = memchr::memmem::FinderBuilder::new()
.build_heuristic(NEEDLE, Hfrx86);
b.iter(|| {
assert_eq!(1, finder.find_iter(&bin).count());
});
}),
);
}

// A byte-frequency table that is good for scanning binary executables
struct Hfrx86;
impl HeuristicFrequencyRank for Hfrx86 {
fn rank(&self, byte: u8) -> u8 {
const TABLE: [u8; 256] = [
255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89,
58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, 68, 11, 9,
6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5,
5, 19, 11, 6, 17, 9, 9, 6, 8, 48, 58, 11, 14, 53, 40, 9, 9, 254,
35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2,
10, 16, 12, 6, 19, 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19,
8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0,
0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 12, 2, 1, 1, 3, 1, 1, 1,
6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9,
4, 5, 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, 16, 3, 5, 1, 6, 1,
1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6,
5, 3, 8, 6, 13, 175,
];
TABLE[byte as usize]
}
}
130 changes: 127 additions & 3 deletions src/memmem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,75 @@ impl<'n> FinderRev<'n> {
}
}

/// This trait allows the user to customize the heuristic used to determine
/// the relative frequency of a given byte in the dataset being searched.
///
/// The use of this trait can have a dramatic impact on performance depending on
/// the type of data being searched. The details of why are explained in the docs of
/// [`prefilter::Prefilter`]. To summarize, the core algorithm uses a prefilter
/// to quickly identify candidate matches that are later verified more slowly.
/// This prefilter is implemented in terms of trying to find `rare` bytes at specific offsets
/// that will occur less frequently in the dataset. While the concept of a `rare` byte is similar
/// for most datasets, there are some specific datasets (like binary executables) that
/// have dramatically different byte distributions. For these datasets customizing the
/// byte frequency heuristic can have a massive impact on performance, and might even
/// need to be done at runtime.
///
/// The default implementation of `HeuristicFrequencyRank` reads from the static
/// frequency table defined in `src/memmem/byte_frequencies.rs`.
/// This is optimal for most inputs, so if you are unsure of the impact of using
/// a custom `HeuristicFrequencyRank` you should probably just use the default.
///
/// Example:
/// ```
/// use memchr::memmem::{FinderBuilder, HeuristicFrequencyRank};
///
/// // A byte-frequency table that is good for scanning binary executables
/// struct X86;
/// impl HeuristicFrequencyRank for X86 {
/// fn rank(&self, byte: u8) -> u8 {
/// const TABLE: [u8; 256] = [
/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
/// ];
/// TABLE[byte as usize]
/// }
/// }
/// // Create a new finder with the custom heuristic
/// let finder = FinderBuilder::new().build_heuristic(b"\x00\x00\xdd\xdd", X86);
/// // Find needle with custom heuristic
/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some());
/// ```
pub trait HeuristicFrequencyRank {
/// Return the heuristical frequency rank of the given byte. A lower rank
/// means the byte is believed to occur less frequently.
fn rank(&self, byte: u8) -> u8;
}

/// The default byte frequency heuristic that is good for most inputs
pub(crate) struct DefaultHFR;
impl HeuristicFrequencyRank for DefaultHFR {
fn rank(&self, byte: u8) -> u8 {
byte_frequencies::BYTE_FREQUENCIES[byte as usize]
}
}

// Allow passing `H` to related functions by reference OR move construction
impl<'a, H> HeuristicFrequencyRank for &'a H
where
H: HeuristicFrequencyRank,
{
fn rank(&self, byte: u8) -> u8 {
H::rank(*self, byte)
}
}

/// A builder for constructing non-default forward or reverse memmem finders.
///
/// A builder is primarily useful for configuring a substring searcher.
Expand All @@ -715,6 +784,27 @@ impl FinderBuilder {
Finder { searcher: Searcher::new(self.config, needle.as_ref()) }
}

/// Build a forward finder using the given needle and a custom heuristic for
/// determining the frequency of a given byte in the dataset.
/// See [`HeuristicFrequencyRank`] for more details.
pub fn build_heuristic<
'n,
B: ?Sized + AsRef<[u8]>,
H: HeuristicFrequencyRank,
>(
&self,
needle: &'n B,
heuristic: H,
) -> Finder<'n> {
Finder {
searcher: Searcher::new_heuristic(
self.config,
needle.as_ref(),
heuristic,
),
}
}

/// Build a reverse finder using the given needle from the current
/// settings.
pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>(
Expand Down Expand Up @@ -817,15 +907,46 @@ enum SearcherKind {
}

impl<'n> Searcher<'n> {
/// NOTE: This method is important and must not be removed!
/// The reason is kind of obscure, so I will try to explain.
///
/// When rust generates assembly for a regular function call, it generates a `direct call`.
/// In assembly, this looks something like `call memchr::memmem::Searcher::new`.
/// The function address is embedded into the instruction, so it is fast.
///
/// When rust generates assembly for a generic function call, it generates an `indirect call`
/// In assembly, this looks something like `call qword ptr [rip + memchr::memmem::Searcher::new@GOTPCREL]`.
/// The function address must be calculated dynamically at runtime, so it is slower.
///
/// Since `FinderBuilder::new_heuristic` is a public method and it uses `H: HeuristicFrequencyRank`, then
/// the compiler cannot make assumptions about the value of `H`, so it will always generate an inefficient
/// indirect call for `Searcher::new`. This is true regardless of how you embed the type signature,
/// as long as a public method accepts a generic type that is then forwarded to other generic functions.
///
/// To prevent an indirect call being generated for the default case where we want to use the default heuristic,
/// we need a non-generic function that will call the generic version with a hard-coded value for `H`.
/// This allows the compiler to generate a regular function that can be called directly.
///
/// When constructing a `Finder` with a custom `HeuristicFrequencyRank`,
/// the use of an indirect call is unavoidable.
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
Self::new_heuristic(config, needle, DefaultHFR)
}

fn new_heuristic<H: HeuristicFrequencyRank>(
config: SearcherConfig,
needle: &'n [u8],
heuristic: H,
) -> Searcher<'n> {
use self::SearcherKind::*;

let ninfo = NeedleInfo::new(needle);
let ninfo = NeedleInfo::new(needle, &heuristic);
let mk = |kind: SearcherKind| {
let prefn = prefilter::forward(
&config.prefilter,
&ninfo.rarebytes,
needle,
heuristic,
);
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
};
Expand Down Expand Up @@ -1010,9 +1131,12 @@ impl<'n> Searcher<'n> {
}

impl NeedleInfo {
pub(crate) fn new(needle: &[u8]) -> NeedleInfo {
pub(crate) fn new<H: HeuristicFrequencyRank>(
needle: &[u8],
heuristic: &H,
) -> NeedleInfo {
NeedleInfo {
rarebytes: RareNeedleBytes::forward(needle),
rarebytes: RareNeedleBytes::forward(needle, heuristic),
nhash: NeedleHash::forward(needle),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/memmem/prefilter/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ mod tests {
use super::*;

fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
let ninfo = NeedleInfo::new(needle);
let ninfo = NeedleInfo::new(needle, &crate::memmem::DefaultHFR);
let mut prestate = PrefilterState::new();
find(&mut prestate, &ninfo, haystack, needle)
}
Expand Down
10 changes: 7 additions & 3 deletions src/memmem/prefilter/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo};
use crate::memmem::{
rarebytes::RareNeedleBytes, HeuristicFrequencyRank, NeedleInfo,
};

mod fallback;
#[cfg(memchr_runtime_simd)]
Expand Down Expand Up @@ -287,10 +289,11 @@ impl PrefilterState {
/// is the default). In general, we try to use an AVX prefilter, followed by
/// SSE and then followed by a generic one based on memchr.
#[inline(always)]
pub(crate) fn forward(
pub(crate) fn forward<H: HeuristicFrequencyRank>(
config: &Prefilter,
rare: &RareNeedleBytes,
needle: &[u8],
heuristic: H,
) -> Option<PrefilterFn> {
if config.is_none() || needle.len() <= 1 {
return None;
Expand Down Expand Up @@ -327,7 +330,8 @@ pub(crate) fn forward(
// Check that our rarest byte has a reasonably low rank. The main issue
// here is that the fallback prefilter can perform pretty poorly if it's
// given common bytes. So we try to avoid the worst cases here.
let (rare1_rank, _) = rare.as_ranks(needle);
let (rare1, _) = rare.as_rare_bytes(needle);
let rare1_rank = heuristic.rank(rare1) as usize;
if rare1_rank <= MAX_FALLBACK_RANK {
// SAFETY: fallback::find is safe to call in all environments.
return unsafe { Some(PrefilterFn::new(fallback::find)) };
Expand Down
27 changes: 9 additions & 18 deletions src/memmem/rarebytes.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use super::HeuristicFrequencyRank;

/// A heuristic frequency based detection of rare bytes for substring search.
///
/// This detector attempts to pick out two bytes in a needle that are predicted
Expand Down Expand Up @@ -44,7 +46,10 @@ impl RareNeedleBytes {

/// Detect the leftmost offsets of the two rarest bytes in the given
/// needle.
pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes {
pub(crate) fn forward<H: HeuristicFrequencyRank>(
needle: &[u8],
h: &H,
) -> RareNeedleBytes {
if needle.len() <= 1 || needle.len() > core::u8::MAX as usize {
// For needles bigger than u8::MAX, our offsets aren't big enough.
// (We make our offsets small to reduce stack copying.)
Expand All @@ -62,17 +67,17 @@ impl RareNeedleBytes {
// Find the rarest two bytes. We make them distinct by construction.
let (mut rare1, mut rare1i) = (needle[0], 0);
let (mut rare2, mut rare2i) = (needle[1], 1);
if rank(rare2) < rank(rare1) {
if h.rank(rare2) < h.rank(rare1) {
core::mem::swap(&mut rare1, &mut rare2);
core::mem::swap(&mut rare1i, &mut rare2i);
}
for (i, &b) in needle.iter().enumerate().skip(2) {
if rank(b) < rank(rare1) {
if h.rank(b) < h.rank(rare1) {
rare2 = rare1;
rare2i = rare1i;
rare1 = b;
rare1i = i as u8;
} else if b != rare1 && rank(b) < rank(rare2) {
} else if b != rare1 && h.rank(b) < h.rank(rare2) {
rare2 = b;
rare2i = i as u8;
}
Expand Down Expand Up @@ -119,18 +124,4 @@ impl RareNeedleBytes {
pub(crate) fn as_rare_usize(&self) -> (usize, usize) {
(self.rare1i as usize, self.rare2i as usize)
}

/// Return the byte frequency rank of each byte. The higher the rank, the
/// more frequency the byte is predicted to be. The needle given must be
/// the same one given to the RareNeedleBytes constructor.
pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) {
let (b1, b2) = self.as_rare_bytes(needle);
(rank(b1), rank(b2))
}
}

/// Return the heuristical frequency rank of the given byte. A lower rank
/// means the byte is believed to occur less frequently.
fn rank(b: u8) -> usize {
crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize
}