Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
96a62fd
Update test script
haga-rak Apr 9, 2026
8b2ed9d
Improve benchmark condition
haga-rak Apr 9, 2026
a93758e
Use ThreadPool for pipe
haga-rak Apr 9, 2026
ea6c499
Remove _headerEncodeLock by moving HPACK encode to WriteLoop
haga-rak Apr 10, 2026
c8c0f18
Update benchmark run settings
haga-rak Apr 10, 2026
5b04b9b
Remove useless await
haga-rak Apr 10, 2026
afd0bdc
Optimize http11 parser hot path
haga-rak Apr 10, 2026
34c6638
Update test plan
haga-rak Apr 10, 2026
acc2cc1
Remove threading diag
haga-rak Apr 11, 2026
a6f4444
Add contention analyzer
haga-rak Apr 11, 2026
a34c6c6
Defer flush outside hot loop
haga-rak Apr 11, 2026
b180954
Fix missing fluxzy on post payload
haga-rak Apr 11, 2026
90c2951
Merge remote-tracking branch 'origin/main' into dev/tweak-perf
haga-rak Apr 11, 2026
920c18e
Add context aware frame reader
haga-rak Apr 11, 2026
1c0d891
Update iteration for short benchmark
haga-rak Apr 12, 2026
a31a40c
Add late flush for write pool
haga-rak Apr 12, 2026
5333f7a
Optimize HPACK decode path: eliminate redundant Huffman traversals an…
haga-rak Apr 12, 2026
4a2026a
Update chanel settings for exchange queue
haga-rak Apr 12, 2026
786d9a9
Better hashcode
haga-rak Apr 12, 2026
8ce78a4
Optimize GetPool hot path: lock-free pool reuse, init-before-store, C…
haga-rak Apr 12, 2026
6b737fc
ServerStreamWorker return ValueTask instead of Task as it will be non…
haga-rak Apr 12, 2026
9318665
Optimize EnforceRules hot path: pre-partition rules by FilterScope
haga-rak Apr 12, 2026
eb7f0cb
Optimize Header.WriteHttp11 hot path
haga-rak Apr 12, 2026
a218702
Statik buffer for benchmark server
haga-rak Apr 12, 2026
8848527
Add allocation profiling: --alloc flag and TraceAllocationAnalyzer
haga-rak Apr 12, 2026
23df339
Pool H2 header accumulation buffer in StreamWorker and ServerStreamWo…
haga-rak Apr 12, 2026
044c6ac
Replace VariableBuildingContext dictionary with allocation-free TryEv…
haga-rak Apr 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion benchmark-throughput.cmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,53 @@
@echo off
setlocal

set FILTER=
set SHORT_ARGS=

:parse_args
if "%~1"=="" goto done_args
if /i "%~1"=="--short" (
set SHORT_ARGS=--warmupCount 1 --iterationCount 20 --launchCount 1
shift
goto parse_args
)
if /i "%~1"=="--contention" (
rem Opt-in CLR contention ETW trace (EventPipe). Produces .nettrace per run
rem in BenchmarkDotNet.Artifacts/. Open in PerfView / VS / speedscope.
set FLUXZY_BENCH_CONTENTION=1
shift
goto parse_args
)
if /i "%~1"=="--alloc" (
rem Opt-in CLR allocation ETW trace (EventPipe). Produces .nettrace per run
rem with sampled GC/AllocationTick events + managed stacks. Defaults to
rem shorter iterations since the trace overhead skews absolute numbers.
rem Open in PerfView ("GC Heap Alloc Ignore Free (Coarse Sampling) Stacks").
set FLUXZY_BENCH_ALLOC=1
if "%SHORT_ARGS%"=="" set SHORT_ARGS=--warmupCount 1 --iterationCount 5 --launchCount 1
shift
goto parse_args
)
if /i "%~1"=="--h2-8k" (
rem H2 + 8192 body only, ~30%% of default duration
set SHORT_ARGS=--warmupCount 2 --iterationCount 10 --launchCount 1
set FILTER=*ProxyThroughputBenchmark*True*8192*
shift
goto parse_args
)
if /i "%~1"=="--h2-0k" (
rem H2 + 0 body only, ~30%% of default duration
set SHORT_ARGS=--warmupCount 2 --iterationCount 10 --launchCount 1
set FILTER=*ProxyThroughputBenchmark*True*0*
shift
goto parse_args
)
set FILTER=%~1
shift
goto parse_args
:done_args

if "%FILTER%"=="" set FILTER=*ProxyThroughputBenchmark*

dotnet build fluxzy.core.slnx -c Release -v q --nologo
dotnet run --project test/Fluxzy.Benchmarks -c Release --no-build -- --filter "%FILTER%"
dotnet run --project test/Fluxzy.Benchmarks -c Release --no-build -- --filter "%FILTER%" %SHORT_ARGS%
50 changes: 48 additions & 2 deletions benchmark-throughput.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,53 @@
#!/usr/bin/env bash
set -euo pipefail

FILTER="${1:-*ProxyThroughputBenchmark*}"
SHORT_ARGS=""
FILTER=""

# Parse options
while [[ $# -gt 0 ]]; do
case "$1" in
--short)
SHORT_ARGS="--warmupCount 1 --iterationCount 10 --launchCount 1"
shift
;;
--contention)
# Opt-in CLR contention ETW trace (EventPipe). Produces .nettrace per run
# in BenchmarkDotNet.Artifacts/. Open in PerfView / VS / speedscope.
export FLUXZY_BENCH_CONTENTION=1
shift
;;
--alloc)
# Opt-in CLR allocation ETW trace (EventPipe). Produces .nettrace per run
# with sampled GC/AllocationTick events + managed stacks. Defaults to
# shorter iterations since the trace overhead skews absolute numbers.
# Open in PerfView ("GC Heap Alloc Ignore Free (Coarse Sampling) Stacks").
export FLUXZY_BENCH_ALLOC=1
if [[ -z "$SHORT_ARGS" ]]; then
SHORT_ARGS="--warmupCount 1 --iterationCount 5 --launchCount 1"
fi
shift
;;
--h2-8k)
# H2 + 8192 body only, ~30% of default duration
SHORT_ARGS="--warmupCount 2 --iterationCount 5 --launchCount 1"
FILTER="*ProxyThroughputBenchmark*True*8192*"
shift
;;
--h2-0k)
# H2 + 0 body only, ~30% of default duration
SHORT_ARGS="--warmupCount 2 --iterationCount 5 --launchCount 1"
FILTER="*ProxyThroughputBenchmark*True*0*"
shift
;;
*)
FILTER="$1"
shift
;;
esac
done

FILTER="${FILTER:-*ProxyThroughputBenchmark*}"

dotnet build fluxzy.core.slnx -c Release -v q --nologo
dotnet run --project test/Fluxzy.Benchmarks -c Release --no-build -- --filter "$FILTER"
dotnet run --project test/Fluxzy.Benchmarks -c Release --no-build -- --filter "$FILTER" $SHORT_ARGS
2 changes: 2 additions & 0 deletions fluxzy.core.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
<File Path=".github/workflows/release-nuget.pre.public.yml" />
<File Path=".github/workflows/release-nuget.public.yml" />
<File Path=".gitignore" />
<File Path="benchmark-throughput.cmd" />
<File Path="benchmark-throughput.sh" />
<File Path="codecov.yml" />
<File Path="Directory.Build.props" />
<File Path="global.json" />
Expand Down
52 changes: 36 additions & 16 deletions src/Fluxzy.Core/Clients/H2/Encoder/HPack/PrimitiveOperation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

using System;
using System.Buffers;
using System.Text;
using Fluxzy.Clients.H2.Encoder.Huffman;

namespace Fluxzy.Clients.H2.Encoder.HPack
Expand Down Expand Up @@ -120,6 +119,23 @@ public int GetStringLength(ReadOnlySpan<byte> input)
}
}

/// <summary>
/// Reads the string wire prefix (huffman flag and wire byte length) without any Huffman decoding.
/// Returns the number of prefix bytes consumed from input.
/// </summary>
public int ReadStringPrefix(ReadOnlySpan<byte> input, out int wireLength, out bool isHuffman)
{
isHuffman = (input[0] & 0x80) != 0;
var prefixBytes = ReadInt32(input, 7, out wireLength);

if (wireLength > _maxStringLength) {
throw new HPackCodecException(
$"string length exceed the maximum authorized : {wireLength} / {_maxStringLength}");
}

return prefixBytes;
}

public Span<char> ReadString(ReadOnlySpan<byte> input, Span<char> buffer, out int newOffset)
{
try {
Expand All @@ -132,32 +148,33 @@ public Span<char> ReadString(ReadOnlySpan<byte> input, Span<char> buffer, out in
}

var rawString = input.Slice(offset, stringLength);
newOffset = stringLength + offset;

if (!huffmanEncoded) {
var size = Encoding.ASCII.GetChars(rawString, buffer);
var res = buffer.Slice(0, size);

newOffset = stringLength + offset;
// Direct byte-to-char widening (HPACK strings are ASCII)
for (var i = 0; i < rawString.Length; i++)
buffer[i] = (char) rawString[i];

return res;
return buffer.Slice(0, rawString.Length);
}

newOffset = stringLength + offset;

var decodedLength = _codec.GetDecodedLength(rawString);
// Upper bound for Huffman: shortest code is 5 bits, so max decoded = wireLen * 8/5 < wireLen * 2
var maxDecodedLength = stringLength * 2;

byte[]? heapBuffer = null;

var decodeBuffer = decodedLength < 1024
? stackalloc byte[decodedLength]
: heapBuffer = ArrayPool<byte>.Shared.Rent(decodedLength);
var decodeBuffer = maxDecodedLength < 1024
? stackalloc byte[maxDecodedLength]
: heapBuffer = ArrayPool<byte>.Shared.Rent(maxDecodedLength);

try {
var decoded = _codec.Decode(rawString, decodeBuffer);

var resultLength = Encoding.ASCII.GetChars(decoded, buffer);
// Direct byte-to-char widening (HPACK strings are ASCII)
for (var i = 0; i < decoded.Length; i++)
buffer[i] = (char) decoded[i];

return buffer.Slice(0, resultLength);
return buffer.Slice(0, decoded.Length);
}
finally {
if (heapBuffer != null)
Expand All @@ -178,8 +195,11 @@ public Span<byte> WriteString(ReadOnlySpan<char> input, Span<byte> buffer, bool
? stackalloc byte[input.Length * 2]
: heapBuffer = ArrayPool<byte>.Shared.Rent(input.Length * 2);

var size = Encoding.ASCII.GetBytes(input, inputByteBuffer);
var inputBytes = inputByteBuffer.Slice(0, size);
// Direct char-to-byte narrowing (HPACK strings are ASCII)
for (var i = 0; i < input.Length; i++)
inputByteBuffer[i] = (byte) input[i];

var inputBytes = inputByteBuffer.Slice(0, input.Length);

var encodedLength = _codec.GetEncodedLength(inputBytes);

Expand Down
100 changes: 42 additions & 58 deletions src/Fluxzy.Core/Clients/H2/Encoder/HPackDecoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,33 +49,6 @@ internal HPackDecoder(
public void Dispose()
{
}

public ReadOnlySpan<char> Decode(
ReadOnlySpan<byte> headerContent, Span<char> buffer,
ref IList<HeaderField> originalFields)
{
_tempEntries.Clear();

try {
for (;;) {
var tableEntry = ReadNextField(headerContent, out var readen);

if (readen <= 0)
break;

_tempEntries.Add(tableEntry);
originalFields.Add(tableEntry);

headerContent = headerContent.Slice(readen);
}

return Http11Parser.Write(_tempEntries, buffer);
}
finally {
_tempEntries.Clear();
}
}

public ReadOnlySpan<char> Decode(ReadOnlySpan<byte> headerContent, Span<char> buffer)
{
_tempEntries.Clear();
Expand Down Expand Up @@ -166,41 +139,46 @@ private HeaderField ReadNextField(in ReadOnlySpan<byte> buffer, out int bytesRea
$"Requested headerIndex does not exist in static table {headerIndex}");
}

var stringLength = _primitiveOperation.GetStringLength(buffer.Slice(offsetLength));
var valPrefix = buffer.Slice(offsetLength);
_primitiveOperation.ReadStringPrefix(valPrefix, out var valWireLen, out var valIsHuffman);
var valCharBudget = valIsHuffman ? valWireLen * 2 : valWireLen;

var lineBuffer =
stringLength < _codecSetting.MaxStackAllocationLength
? stackalloc char[stringLength]
: new char[stringLength];
valCharBudget <= _codecSetting.MaxStackAllocationLength
? stackalloc char[valCharBudget]
: new char[valCharBudget];

var headerValue =
_primitiveOperation
.ReadString(buffer.Slice(offsetLength)
, lineBuffer, out var headerValueLength);
.ReadString(valPrefix, lineBuffer, out var headerValueLength);

bytesReaden = offsetLength + headerValueLength;

return Context.Register(header.Name.Span, headerValue);
}

case HeaderFieldType.LiteralHeaderFieldIncrementalIndexingWithName: {
var headerNameLength = _primitiveOperation.GetStringLength(buffer.Slice(1));
var nameSlice = buffer.Slice(1);
_primitiveOperation.ReadStringPrefix(nameSlice, out var nameWireLen, out var nameIsHuffman);
var nameCharBudget = nameIsHuffman ? nameWireLen * 2 : nameWireLen;

var headerNameBuffer =
headerNameLength < _codecSetting.MaxStackAllocationLength
? stackalloc char[headerNameLength]
: new char[headerNameLength];
nameCharBudget <= _codecSetting.MaxStackAllocationLength
? stackalloc char[nameCharBudget]
: new char[nameCharBudget];

var headerName =
_primitiveOperation.ReadString(buffer.Slice(1), headerNameBuffer, out var offsetHeaderName);
_primitiveOperation.ReadString(nameSlice, headerNameBuffer, out var offsetHeaderName);

var headerValueLength = _primitiveOperation.GetStringLength(buffer.Slice(1 + offsetHeaderName));
var valSlice = buffer.Slice(1 + offsetHeaderName);
_primitiveOperation.ReadStringPrefix(valSlice, out var valWireLen2, out var valIsHuffman2);
var valCharBudget2 = valIsHuffman2 ? valWireLen2 * 2 : valWireLen2;

var headerValueBuffer = headerValueLength < _codecSetting.MaxStackAllocationLength
? stackalloc char[headerValueLength]
: new char[headerValueLength];
var headerValueBuffer = valCharBudget2 <= _codecSetting.MaxStackAllocationLength
? stackalloc char[valCharBudget2]
: new char[valCharBudget2];

var headerValue = _primitiveOperation.ReadString(buffer.Slice(1 + offsetHeaderName),
var headerValue = _primitiveOperation.ReadString(valSlice,
headerValueBuffer, out var offsetHeaderValue);

bytesReaden = 1 + offsetHeaderName + offsetHeaderValue;
Expand All @@ -215,14 +193,16 @@ private HeaderField ReadNextField(in ReadOnlySpan<byte> buffer, out int bytesRea
if (!Context.TryGetEntry(index, out var tableEntry))
throw new HPackCodecException($"Referenced index header {index} is absent from decodingTable");

var resultStringLength = _primitiveOperation.GetStringLength(buffer.Slice(offsetLength));
var valSlice3 = buffer.Slice(offsetLength);
_primitiveOperation.ReadStringPrefix(valSlice3, out var valWireLen3, out var valIsHuffman3);
var valCharBudget3 = valIsHuffman3 ? valWireLen3 * 2 : valWireLen3;

var lineBuffer =
resultStringLength < _codecSetting.MaxStackAllocationLength
? stackalloc char[resultStringLength]
: new char[resultStringLength];
valCharBudget3 <= _codecSetting.MaxStackAllocationLength
? stackalloc char[valCharBudget3]
: new char[valCharBudget3];

var resultString = _primitiveOperation.ReadString(buffer.Slice(offsetLength), lineBuffer,
var resultString = _primitiveOperation.ReadString(valSlice3, lineBuffer,
out var offsetValueLength);

bytesReaden = offsetLength + offsetValueLength;
Expand All @@ -232,24 +212,28 @@ private HeaderField ReadNextField(in ReadOnlySpan<byte> buffer, out int bytesRea

case HeaderFieldType.LiteralHeaderFieldNeverIndexWithName:
case HeaderFieldType.LiteralHeaderFieldWithoutIndexingWithName: {
var headerNameLength = _primitiveOperation.GetStringLength(buffer.Slice(1));
var nameSlice4 = buffer.Slice(1);
_primitiveOperation.ReadStringPrefix(nameSlice4, out var nameWireLen4, out var nameIsHuffman4);
var nameCharBudget4 = nameIsHuffman4 ? nameWireLen4 * 2 : nameWireLen4;

var headerNameBuffer =
headerNameLength < _codecSetting.MaxStackAllocationLength
? stackalloc char[headerNameLength]
: new char[headerNameLength];
nameCharBudget4 <= _codecSetting.MaxStackAllocationLength
? stackalloc char[nameCharBudget4]
: new char[nameCharBudget4];

var headerName =
_primitiveOperation.ReadString(buffer.Slice(1), headerNameBuffer, out var nameLength);
_primitiveOperation.ReadString(nameSlice4, headerNameBuffer, out var nameLength);

var headerValueLength = _primitiveOperation.GetStringLength(buffer.Slice(1 + nameLength));
var valSlice4 = buffer.Slice(1 + nameLength);
_primitiveOperation.ReadStringPrefix(valSlice4, out var valWireLen4, out var valIsHuffman4);
var valCharBudget4 = valIsHuffman4 ? valWireLen4 * 2 : valWireLen4;

var headerValueBuffer =
headerValueLength < _codecSetting.MaxStackAllocationLength
? stackalloc char[headerValueLength]
: new char[headerValueLength];
valCharBudget4 <= _codecSetting.MaxStackAllocationLength
? stackalloc char[valCharBudget4]
: new char[valCharBudget4];

var headerValue = _primitiveOperation.ReadString(buffer.Slice(1 + nameLength), headerValueBuffer,
var headerValue = _primitiveOperation.ReadString(valSlice4, headerValueBuffer,
out var valueLength);

bytesReaden = 1 + nameLength + valueLength;
Expand Down
9 changes: 7 additions & 2 deletions src/Fluxzy.Core/Clients/H2/Encoder/HPackEncoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,13 @@ public ReadOnlySpan<byte> Encode(ReadOnlyMemory<char> headerContent, Span<byte>
{
var offset = 0;

foreach (var headerField in Http11Parser.Read(headerContent, isHttps)) {
offset += Encode(headerField, buffer.Slice(offset));
// Hot path: stream parse + encode in a single pass via a ref-struct enumerator,
// avoiding the List<HeaderField> and iterator-state-machine allocations that used
// to dominate the per-request cost.
var reader = new Http11HeaderReader(headerContent, isHttps);

while (reader.MoveNext()) {
offset += Encode(reader.Current, buffer.Slice(offset));
}

return buffer.Slice(0, offset);
Expand Down
Loading
Loading