Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions benchmarks/NPOI.Benchmarks/LargeExcelFileBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ public class LargeExcelFileBenchmark
private static MemoryStream _memoryStream;
private string _filePath;

// 36 MB .xlsx sourced from https://github.com/mini-software/MiniExcel/tree/master/benchmarks/MiniExcel.Benchmarks
// 1,000,000 rows × 10 cols, all cells are shared strings → uniqueCount=1,000,000
// Uncompressed sharedStrings.xml is ~31 MB, making SST the dominant parse cost.
private string _largeFileWithSstPath;

[GlobalSetup]
public void GlobalSetup()
{
Expand All @@ -37,6 +42,8 @@ public void GlobalSetup()

_loadedWorkBook = new XSSFWorkbook(copyPath);
_memoryStream = new MemoryStream();

_largeFileWithSstPath = Path.Combine("data", "Test1000000x10_SharingStrings.xlsx");
}

[Benchmark]
Expand All @@ -46,6 +53,35 @@ public void XSSFWorkbookLoad()
workbook.Dispose();
}

/// <summary>
/// Opens a 36 MB workbook whose sharedStrings.xml decompresses to ~31 MB
/// (1,000,000 unique strings) and immediately disposes without reading any cells.
/// With lazy SST loading the shared strings table is never parsed, so this
/// benchmark represents the minimum overhead of opening the workbook.
/// Compare with <see cref="XSSFWorkbookLargeSstLoadStrings"/> which forces
/// SST parsing to be able to read cell values.
/// </summary>
[Benchmark]
public void XSSFWorkbookLargeSstOpenDispose()
{
using var workbook = new XSSFWorkbook(_largeFileWithSstPath, true);
}

/// <summary>
/// Opens the same 36 MB workbook and explicitly forces the SST to load by
/// accessing <see cref="NPOI.XSSF.Model.SharedStringsTable.Count"/>.
/// This is the baseline that shows how expensive eager DOM-based SST parsing
/// would be; with lazy loading + streaming parser the cost is deferred and
/// reduced in allocation compared to the old DOM path.
/// </summary>
[Benchmark]
public void XSSFWorkbookLargeSstLoadStrings()
{
using var workbook = new XSSFWorkbook(_largeFileWithSstPath, true);
// Force SST parse
_ = workbook.GetSharedStringSource().Count;
}

[Benchmark]
public void XSSFReaderLoad()
{
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/NPOI.Benchmarks/NPOI.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
<None Update="data\test-performance.xlsx">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="data\Test1000000x10_SharingStrings.xlsx">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Binary file not shown.
Loading
Loading