From 45b60feb4927c01ac88e4365f774a5601d014718 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Thu, 13 Apr 2023 18:22:48 -0300 Subject: [PATCH] Improve granularity of benchmarks (#122) To provide better baselines for perf work, etc... It unfortunately requires exposing a few functions as public, though they are hidden from the docs and re-exported under a `_benchable` mod. They have warnings. Don't use them. We will change them. You will be sad. You've been warned. --- benches/parse.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++++- src/iter.rs | 2 + src/lib.rs | 62 ++++++++++++++++++--------- 3 files changed, 150 insertions(+), 21 deletions(-) diff --git a/benches/parse.rs b/benches/parse.rs index cc6d470..98952e3 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -83,9 +83,112 @@ fn resp_short(c: &mut Criterion) { })); } +fn uri(c: &mut Criterion) { + fn _uri(c: &mut Criterion, name: &str, input: &'static [u8]) { + c.benchmark_group("uri") + .throughput(Throughput::Bytes(input.len() as u64)) + .bench_function(name, |b| b.iter(|| { + black_box({ + let mut b = httparse::_benchable::Bytes::new(input); + httparse::_benchable::parse_uri(&mut b).unwrap() + }); + })); + } + + const S: &[u8] = b" "; + const CHUNK64: &[u8] = b"/wp-content/uploads/2022/08/31/hello-kitty-darth-vader-pink.webp"; + let chunk_4k = CHUNK64.repeat(64); + + // 1b to 4096b + for p in 0..=12 { + let n = 1 << p; + _uri(c, &format!("uri_{}b", n), [chunk_4k[..n].to_vec(), S.into()].concat().leak()); + } +} + +fn header(c: &mut Criterion) { + fn _header(c: &mut Criterion, name: &str, input: &'static [u8]) { + let mut headers = [httparse::EMPTY_HEADER; 128]; + c.benchmark_group("header") + .throughput(Throughput::Bytes(input.len() as u64)) + .bench_function(name, |b| b.iter(|| { + black_box({ + let _ = httparse::parse_headers(input, &mut headers).unwrap(); + }); + })); + } + + const RN: &[u8] = b"\r\n"; + const RNRN: &[u8] = b"\r\n\r\n"; + const TINY_RN: &[u8] = b"a: b\r\n"; // minimal header line + const XFOOBAR: &[u8] = b"X-Foobar"; + let xfoobar_4k = XFOOBAR.repeat(4096/XFOOBAR.len()); + + // header names 1b to 4096b + for p in 0..=12 { + let n = 1 << p; + let payload = [&xfoobar_4k[..n], b": b", RNRN].concat().leak(); + _header(c, &format!("name_{}b", n), payload); + } + + // header values 1b to 4096b + for p in 0..=12 { + let n = 1 << p; + let payload = [b"a: ", &xfoobar_4k[..n], RNRN].concat().leak(); + _header(c, &format!("value_{}b", n), payload); + } + + // 1 to 128 + for p in 0..=7 { + let n = 1 << p; + _header(c, &format!("count_{}", n), [TINY_RN.repeat(n), RN.into()].concat().leak()); + } +} + +fn version(c: &mut Criterion) { + fn _version(c: &mut Criterion, name: &str, input: &'static [u8]) { + c.benchmark_group("version") + .throughput(Throughput::Bytes(input.len() as u64)) + .bench_function(name, |b| b.iter(|| { + black_box({ + let mut b = httparse::_benchable::Bytes::new(input); + httparse::_benchable::parse_version(&mut b).unwrap() + }); + })); + } + + _version(c, "http10", b"HTTP/1.0\r\n"); + _version(c, "http11", b"HTTP/1.1\r\n"); + _version(c, "partial", b"HTTP/1."); +} + +fn method(c: &mut Criterion) { + fn _method(c: &mut Criterion, name: &str, input: &'static [u8]) { + c.benchmark_group("method") + .throughput(Throughput::Bytes(input.len() as u64)) + .bench_function(name, |b| b.iter(|| { + black_box({ + let mut b = httparse::_benchable::Bytes::new(input); + httparse::_benchable::parse_method(&mut b).unwrap() + }); + })); + } + + // Common methods should be fast-pathed + const COMMON_METHODS: &[&str] = &["GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH"]; + for method in COMMON_METHODS { + _method(c, &method.to_lowercase(), format!("{} / HTTP/1.1\r\n", method).into_bytes().leak()); + } + // Custom methods should be infrequent and thus not worth optimizing + _method(c, "custom", b"CUSTOM / HTTP/1.1\r\n"); +} + +const WARMUP: Duration = Duration::from_millis(100); +const MTIME: Duration = Duration::from_millis(100); +const SAMPLES: usize = 200; criterion_group!{ name = benches; - config = Criterion::default().sample_size(100).measurement_time(Duration::from_secs(10)); - targets = req, req_short, resp, resp_short + config = Criterion::default().sample_size(SAMPLES).warm_up_time(WARMUP).measurement_time(MTIME); + targets = req, req_short, resp, resp_short, uri, header, version, method } criterion_main!(benches); diff --git a/src/iter.rs b/src/iter.rs index c004f41..0d86f9e 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -2,11 +2,13 @@ use core::slice; use core::convert::TryInto; use core::convert::TryFrom; +#[allow(missing_docs)] pub struct Bytes<'a> { slice: &'a [u8], pos: usize } +#[allow(missing_docs)] impl<'a> Bytes<'a> { #[inline] pub fn new(slice: &'a [u8]) -> Bytes<'a> { diff --git a/src/lib.rs b/src/lib.rs index 7153de5..2f4e57b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,16 @@ mod iter; #[macro_use] mod macros; mod simd; +#[doc(hidden)] +// Expose some internal functions so we can bench them individually +// WARNING: Exported for internal benchmarks, not fit for public consumption +pub mod _benchable { + pub use super::parse_uri; + pub use super::parse_version; + pub use super::parse_method; + pub use super::iter::Bytes; +} + /// Determines if byte is a token char. /// /// > ```notrust @@ -476,23 +486,7 @@ impl<'h, 'b> Request<'h, 'b> { let orig_len = buf.len(); let mut bytes = Bytes::new(buf); complete!(skip_empty_lines(&mut bytes)); - const GET: [u8; 4] = *b"GET "; - const POST: [u8; 4] = *b"POST"; - let method = match bytes.peek_n::<[u8; 4]>(4) { - Some(GET) => { - unsafe { - bytes.advance_and_commit(4); - } - "GET" - } - Some(POST) if bytes.peek_ahead(4) == Some(b' ') => { - unsafe { - bytes.advance_and_commit(5); - } - "POST" - } - _ => complete!(parse_token(&mut bytes)), - }; + let method = complete!(parse_method(&mut bytes)); self.method = Some(method); if config.allow_multiple_spaces_in_request_line_delimiters { complete!(skip_spaces(&mut bytes)); @@ -743,7 +737,10 @@ impl<'a> fmt::Debug for Header<'a> { pub const EMPTY_HEADER: Header<'static> = Header { name: "", value: b"" }; #[inline] -fn parse_version(bytes: &mut Bytes<'_>) -> Result { +#[doc(hidden)] +#[allow(missing_docs)] +// WARNING: Exported for internal benchmarks, not fit for public consumption +pub fn parse_version(bytes: &mut Bytes) -> Result { if let Some(eight) = bytes.peek_n::<[u8; 8]>(8) { unsafe { bytes.advance(8); } return match &eight { @@ -767,6 +764,30 @@ fn parse_version(bytes: &mut Bytes<'_>) -> Result { Ok(Status::Partial) } +#[inline] +#[doc(hidden)] +#[allow(missing_docs)] +// WARNING: Exported for internal benchmarks, not fit for public consumption +pub fn parse_method<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { + const GET: [u8; 4] = *b"GET "; + const POST: [u8; 4] = *b"POST"; + match bytes.peek_n::<[u8; 4]>(4) { + Some(GET) => { + unsafe { + bytes.advance_and_commit(4); + } + Ok(Status::Complete("GET")) + } + Some(POST) if bytes.peek_ahead(4) == Some(b' ') => { + unsafe { + bytes.advance_and_commit(5); + } + Ok(Status::Complete("POST")) + } + _ => parse_token(bytes), + } +} + /// From [RFC 7230](https://tools.ietf.org/html/rfc7230): /// /// > ```notrust @@ -838,7 +859,10 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { } #[inline] -fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { +#[doc(hidden)] +#[allow(missing_docs)] +// WARNING: Exported for internal benchmarks, not fit for public consumption +pub fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { let b = next!(bytes); if !is_uri_token(b) { // First char must be a URI char, it can't be a space which would indicate an empty path.