From 599ff2413d676d7f1c48e9df8fb58568d3cd5ba6 Mon Sep 17 00:00:00 2001 From: Ken Smith Date: Tue, 24 Mar 2026 15:26:37 -0700 Subject: [PATCH] Restore sorted cell iteration order after SortedDictionary removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1753 replaced SortedDictionary with Dictionary in XSSFRow for O(1) insert/lookup, but GetEnumerator() and Cells no longer returned cells in ascending column order — a breaking change for existing consumers. Add a lazily-built sorted cell cache (List) that is populated on first read and invalidated on mutation (add/remove/rebuild). This preserves the Dictionary performance win while restoring the sorted iteration contract. Applied consistently to XSSFRow, HSSFRow, and SXSSFRow. Co-Authored-By: Claude Opus 4.6 (1M context) --- main/HSSF/UserModel/HSSFRow.cs | 17 +++++++++-- ooxml/XSSF/Streaming/SXSSFRow.cs | 16 ++++++++-- ooxml/XSSF/UserModel/XSSFRow.cs | 52 +++++++++++++++++++++----------- 3 files changed, 62 insertions(+), 23 deletions(-) diff --git a/main/HSSF/UserModel/HSSFRow.cs b/main/HSSF/UserModel/HSSFRow.cs index f47fe6a0b..e3ad2211b 100644 --- a/main/HSSF/UserModel/HSSFRow.cs +++ b/main/HSSF/UserModel/HSSFRow.cs @@ -42,6 +42,7 @@ public class HSSFRow : IRow, IComparable private int rowNum; private Dictionary cells = new Dictionary(); + private List _sortedCellCache; /** * reference to low level representation @@ -177,7 +178,7 @@ private void RemoveCell(ICell cell, bool alsoRemoveRecords) ((HSSFCell)cell).NotifyArrayFormulaChanging(); } cells.Remove(column); - + _sortedCellCache = null; if (alsoRemoveRecords) { @@ -402,6 +403,7 @@ private void AddCell(ICell cell) // Array.Copy(oldCells, 0, cells, 0, oldCells.Length); //} cells[column] = cell; + _sortedCellCache = null; // fix up firstCol and lastCol indexes if (row.IsEmpty|| column < row.FirstCol) @@ -680,7 +682,7 @@ private short FindFirstCell(int firstcell) public List Cells { get { - return new List(this.cells.Values); + return new List(GetSortedCells()); } } @@ -720,7 +722,7 @@ public bool? Collapsed /// public IEnumerator GetEnumerator() { - return this.cells.Values.GetEnumerator(); + return GetSortedCells().GetEnumerator(); } /// @@ -789,5 +791,14 @@ public bool HasCustomHeight() { throw new NotImplementedException(); } + + private List GetSortedCells() + { + if (_sortedCellCache == null) + { + _sortedCellCache = cells.OrderBy(kv => kv.Key).Select(kv => kv.Value).ToList(); + } + return _sortedCellCache; + } } } \ No newline at end of file diff --git a/ooxml/XSSF/Streaming/SXSSFRow.cs b/ooxml/XSSF/Streaming/SXSSFRow.cs index 5116adf0e..d020cee2d 100644 --- a/ooxml/XSSF/Streaming/SXSSFRow.cs +++ b/ooxml/XSSF/Streaming/SXSSFRow.cs @@ -27,6 +27,7 @@ public class SXSSFRow : IRow, IComparable { private readonly SXSSFSheet _sheet; // parent sheet private readonly Dictionary _cells = new Dictionary(); + private List _sortedCellCache; private short _style = -1; // index of cell style in style table private bool _zHeight; // row zero-height (this is somehow different than being hidden) private float _height = -1; @@ -53,7 +54,7 @@ public virtual bool HasCustomHeight() public List Cells { - get { return _cells.Values.Select(cell => (ICell) cell).ToList(); } + get { return new List(GetSortedCells()); } } public short FirstCellNum @@ -235,6 +236,7 @@ public ICell CreateCell(int column, CellType type) CheckBounds(column); SXSSFCell cell = new SXSSFCell(this, type); _cells[column] = cell; + _sortedCellCache = null; UpdateIndexWhenAdd(column); return cell; } @@ -299,7 +301,7 @@ public ICell GetCell(int cellnum, MissingCellPolicy policy) } public IEnumerator GetEnumerator() { - return _cells.Values.GetEnumerator(); + return GetSortedCells().GetEnumerator(); } public void MoveCell(ICell cell, int newColumn) @@ -311,6 +313,7 @@ public void RemoveCell(ICell cell) { int index = GetCellIndex((SXSSFCell)cell); _cells.Remove(index); + _sortedCellCache = null; if (index == _firstCellNum) { InvalidateFirstCellNum(); @@ -373,6 +376,15 @@ IEnumerator IEnumerable.GetEnumerator() return this.GetEnumerator(); } + private List GetSortedCells() + { + if (_sortedCellCache == null) + { + _sortedCellCache = _cells.OrderBy(kv => kv.Key).Select(kv => (ICell)kv.Value).ToList(); + } + return _sortedCellCache; + } + /** * Create an iterator over the cells from [0, getLastCellNum()). * Includes blank cells, excludes empty cells diff --git a/ooxml/XSSF/UserModel/XSSFRow.cs b/ooxml/XSSF/UserModel/XSSFRow.cs index 6b6514a59..5d205a510 100644 --- a/ooxml/XSSF/UserModel/XSSFRow.cs +++ b/ooxml/XSSF/UserModel/XSSFRow.cs @@ -67,6 +67,12 @@ public class XSSFRow : IRow, IComparable /// Avoids O(n) LINQ Max() scan on every LastCellNum access. /// private int _cachedLastCellNum = -1; + + /// + /// Lazily-built list of cells sorted by column index. + /// Null when invalidated; rebuilt on first read access. + /// + private List _sortedCellCache; #endregion #region Public properties @@ -561,8 +567,8 @@ public CT_Row GetCTRow() /// internal void OnDocumentWrite() { - var orderedCells = _cells.OrderBy(kv => kv.Key).Select(kv => (XSSFCell)kv.Value).ToList(); - + var sortedCells = GetSortedCells(); + bool isOrdered = true; if (_row.SizeOfCArray() != _cells.Count) { @@ -571,9 +577,9 @@ internal void OnDocumentWrite() else { int i = 0; - foreach (XSSFCell cell in orderedCells) + foreach (ICell cell in sortedCells) { - CT_Cell c1 = cell.GetCTCell(); + CT_Cell c1 = ((XSSFCell)cell).GetCTCell(); CT_Cell c2 = _row.GetCArray(i++); string r1 = c1.r; @@ -590,7 +596,7 @@ internal void OnDocumentWrite() { CT_Cell[] cArray = new CT_Cell[_cells.Count]; int i = 0; - foreach (XSSFCell c in orderedCells) + foreach (XSSFCell c in sortedCells.Cast()) { cArray[i++] = c.GetCTCell(); } @@ -653,6 +659,7 @@ internal void RebuildCells() _row.c.Sort((col1, col2) => col1.r.CompareTo(col2.r)); // Cache is invalid after rebuild — keys may have changed + _sortedCellCache = null; _cachedFirstCellNum = -1; _cachedLastCellNum = -1; } @@ -660,21 +667,21 @@ internal void RebuildCells() #region IEnumerable and IComparable members /// - /// Cell iterator over the physically defined cell + /// Cell iterator over the physically defined cells, sorted by column index. /// - /// an iterator over cells in this row. - public Dictionary.ValueCollection.Enumerator CellIterator() + /// an iterator over cells in this row in ascending column order. + public IEnumerator CellIterator() { - return _cells.Values.GetEnumerator(); + return GetSortedCells().GetEnumerator(); } /// /// Alias for to allow foreach loops /// - /// an iterator over cells in this row. + /// an iterator over cells in this row in ascending column order. public IEnumerator GetEnumerator() { - return _cells.Values.GetEnumerator(); + return GetSortedCells().GetEnumerator(); } /// @@ -723,13 +730,7 @@ public List Cells { get { - List cells = new List(); - foreach (ICell cell in _cells.Values) - { - cells.Add(cell); - } - - return cells; + return new List(GetSortedCells()); } } @@ -836,11 +837,25 @@ private int GetLastKey() return _cells.Keys.Max(); } + /// + /// Returns cells sorted by column index, using a lazily-built cache. + /// The cache is invalidated on any cell mutation (add/remove/rebuild). + /// + private List GetSortedCells() + { + if (_sortedCellCache == null) + { + _sortedCellCache = _cells.OrderBy(kv => kv.Key).Select(kv => kv.Value).ToList(); + } + return _sortedCellCache; + } + /// /// Update cached min/max on cell addition. O(1) — just compare with current bounds. /// private void UpdateCacheOnAdd(int columnIndex) { + _sortedCellCache = null; if (_cachedFirstCellNum < 0 || columnIndex < _cachedFirstCellNum) { _cachedFirstCellNum = columnIndex; @@ -857,6 +872,7 @@ private void UpdateCacheOnAdd(int columnIndex) /// private void InvalidateCacheOnRemove(int removedIndex) { + _sortedCellCache = null; if (_cells.Count == 0) { _cachedFirstCellNum = -1;