Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions ETL-Abstractions.sln

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions benchmarks/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Analyzer rules relaxed for benchmark projects

[*.cs]

# AsyncFixer01: Remove async/await — benchmark components use async-iterator
# conventions and intentionally minimal awaits
dotnet_diagnostic.AsyncFixer01.severity = none

# MA0004: Use ConfigureAwait(false) — not needed in the benchmark harness
dotnet_diagnostic.MA0004.severity = none

# S1215: Remove use of GC.GetTotalMemory — required for memory benchmarks
dotnet_diagnostic.S1215.severity = none

# VSTHRD200: Use Async suffix — benchmark methods follow BenchmarkDotNet naming conventions
dotnet_diagnostic.VSTHRD200.severity = none
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using Wolfgang.Etl.Abstractions;

namespace Wolfgang.Etl.Abstractions.Benchmarks;

// ----------------------------------------------------------------------
// Base-class-derived components — exercise the ExtractorBase / TransformerBase
// / LoaderBase machinery (Interlocked counters, progress plumbing, the async
// iterator wrapper) over a purely in-memory sequence so the benchmark measures
// the abstraction overhead, not any I/O.
// ----------------------------------------------------------------------

/// <summary>An extractor that yields <c>0..count-1</c> from memory.</summary>
internal sealed class SequenceExtractor : ExtractorBase<int, Report>
{
private readonly int _count;



public SequenceExtractor(int count)
{
_count = count;
}



protected override async IAsyncEnumerable<int> ExtractWorkerAsync
(
[EnumeratorCancellation] CancellationToken token
)
{
for (var i = 0; i < _count; i++)
{
IncrementCurrentItemCount();
yield return i;
}

await Task.CompletedTask;
}



protected override Report CreateProgressReport()
{
return new Report(CurrentItemCount);
}
}



/// <summary>A pass-through transformer that yields each source item unchanged.</summary>
internal sealed class PassThroughTransformer : TransformerBase<int, int, Report>
{
protected override async IAsyncEnumerable<int> TransformWorkerAsync
(
IAsyncEnumerable<int> items,
[EnumeratorCancellation] CancellationToken token
)
{
await foreach (var item in items.WithCancellation(token))
{
IncrementCurrentItemCount();
yield return item;
}
}



protected override Report CreateProgressReport()
{
return new Report(CurrentItemCount);
}
}



/// <summary>A loader that drains the sequence, counting items.</summary>
internal sealed class CountingLoader : LoaderBase<int, Report>
{
protected override async Task LoadWorkerAsync
(
IAsyncEnumerable<int> items,
CancellationToken token
)
{
await foreach (var _ in items.WithCancellation(token))
{
IncrementCurrentItemCount();
}
}



protected override Report CreateProgressReport()
{
return new Report(CurrentItemCount);
}
}



// ----------------------------------------------------------------------
// Interface-only components — implement just the simplest (no-progress,
// no-cancellation) ETL interfaces so the fluent Pipeline composition resolves
// to its leanest path, isolating the cost of the Pipeline plumbing itself.
// ----------------------------------------------------------------------

/// <summary>A minimal <see cref="IExtractAsync{TSource}"/> over an in-memory range.</summary>
internal sealed class RangeExtractor : IExtractAsync<int>
{
private readonly int _count;



public RangeExtractor(int count)
{
_count = count;
}



public async IAsyncEnumerable<int> ExtractAsync()
{
for (var i = 0; i < _count; i++)
{
yield return i;
}

await Task.CompletedTask;
}
}



/// <summary>A minimal pass-through <see cref="ITransformAsync{TSource, TDestination}"/>.</summary>
internal sealed class IdentityTransformer : ITransformAsync<int, int>
{
public async IAsyncEnumerable<int> TransformAsync(IAsyncEnumerable<int> items)
{
await foreach (var item in items)
{
yield return item;
}
}
}



/// <summary>A minimal <see cref="ILoadAsync{TDestination}"/> that drains the sequence.</summary>
internal sealed class SinkLoader : ILoadAsync<int>
{
public async Task LoadAsync(IAsyncEnumerable<int> items)
{
await foreach (var _ in items)
{
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System;
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;
using Wolfgang.Etl.Abstractions;

namespace Wolfgang.Etl.Abstractions.Benchmarks;

/// <summary>
/// Measures the per-item overhead the <see cref="ExtractorBase{TSource, TProgress}"/>
/// machinery adds on top of a bare in-memory sequence — the async-iterator
/// wrapper, the <c>Interlocked</c>-based item counter, and the progress path
/// (timer creation + a final progress report).
/// </summary>
[MemoryDiagnoser]
public class ExtractorBenchmarks
{
[Params(1_000, 100_000)]
public int RecordCount { get; set; }



[Benchmark(Baseline = true)]
public async Task<int> Extract_NoProgress()
{
var extractor = new SequenceExtractor(RecordCount);

var count = 0;
await foreach (var _ in extractor.ExtractAsync())
{
count++;
}

return count;
}



[Benchmark]
public async Task<int> Extract_WithProgress()
{
var extractor = new SequenceExtractor(RecordCount);
IProgress<Report> progress = new Sink();

var count = 0;
await foreach (var _ in extractor.ExtractAsync(progress))
{
count++;
}

return count;
}



// Synchronous no-op progress sink — avoids the SynchronizationContext
// posting that System.Progress<T> would add as benchmark noise.
private sealed class Sink : IProgress<Report>
{
public void Report(Report value)
{
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;
using Wolfgang.Etl.Abstractions;

namespace Wolfgang.Etl.Abstractions.Benchmarks;

/// <summary>
/// Measures the cost of composing and running a full Extract → Transform → Load
/// pipeline. Compares the fluent <see cref="Pipeline"/> builder against hand-wired
/// <see cref="IAsyncEnumerable{T}"/> composition over the same in-memory stages,
/// so the delta is the builder's plumbing overhead (it should be negligible — the
/// builder is documented as zero-extra-allocation sugar over the same composition).
/// </summary>
[MemoryDiagnoser]
public class PipelineBenchmarks
{
[Params(1_000, 100_000)]
public int RecordCount { get; set; }



[Benchmark(Baseline = true)]
public async Task FluentPipeline()
{
await Pipeline
.Extract(new RangeExtractor(RecordCount))
.Transform(new IdentityTransformer())
.Load(new SinkLoader())
.RunAsync();
}



[Benchmark]
public async Task ManualComposition()
{
var extractor = new RangeExtractor(RecordCount);
var transformer = new IdentityTransformer();
var loader = new SinkLoader();

await loader.LoadAsync(transformer.TransformAsync(extractor.ExtractAsync()));
}



// Same Extract -> Transform -> Load shape, but wired with the base-class
// components (ExtractorBase / TransformerBase / LoaderBase) so the result
// captures the full abstraction overhead — Interlocked item counting and
// the async-iterator wrappers on every stage.
[Benchmark]
public async Task BaseClassComposition()
{
var extractor = new SequenceExtractor(RecordCount);
var transformer = new PassThroughTransformer();
var loader = new CountingLoader();

await loader.LoadAsync(transformer.TransformAsync(extractor.ExtractAsync()));
}
}
5 changes: 5 additions & 0 deletions benchmarks/Wolfgang.Etl.Abstractions.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using BenchmarkDotNet.Running;

BenchmarkSwitcher
.FromAssembly(typeof(Program).Assembly)
.Run(args);
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>latest</LangVersion>
<ImplicitUsings>disable</ImplicitUsings>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\src\Wolfgang.Etl.Abstractions\Wolfgang.Etl.Abstractions.csproj" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
</ItemGroup>

<!-- Analyzer PackageReferences are centralized in Directory.Build.props -->

</Project>
Loading