diff --git a/Directory.Packages.props b/Directory.Packages.props
index 9e20d6f26..c6bc5d70b 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -14,6 +14,7 @@
+
@@ -26,4 +27,4 @@
-
+
\ No newline at end of file
diff --git a/main/HSSF/Extractor/ExcelExtractor.cs b/main/HSSF/Extractor/ExcelExtractor.cs
index 9e9b92097..2f3cf5d2f 100644
--- a/main/HSSF/Extractor/ExcelExtractor.cs
+++ b/main/HSSF/Extractor/ExcelExtractor.cs
@@ -42,7 +42,7 @@ public class ExcelExtractor : POIOLE2TextExtractor, IExcelExtractor
private bool formulasNotResults = false;
private bool includeCellComments = false;
private bool includeBlankCells = false;
- private bool includeHeaderFooter = true;
+ private bool includeHeadersFooters = true;
///
/// Initializes a new instance of the class.
///
@@ -65,13 +65,13 @@ public ExcelExtractor(POIFSFileSystem fs)
///
/// Should header and footer be included? Default is true
///
- public bool IncludeHeaderFooter
+ public bool IncludeHeadersFooters
{
get {
- return this.includeHeaderFooter;
+ return this.includeHeadersFooters;
}
set {
- this.includeHeaderFooter = value;
+ this.includeHeadersFooters = value;
}
}
///
@@ -137,6 +137,8 @@ public bool IncludeBlankCells
}
}
+ public bool AddTabEachEmptyCell { get; set; }
+
///
/// Retreives the text contents of the file
///
@@ -168,7 +170,7 @@ public override String Text
}
// Header text, if there is any
- if (sheet.Header != null && includeHeaderFooter)
+ if (sheet.Header != null && includeHeadersFooters)
{
text.Append(
ExtractHeaderFooter(sheet.Header)
@@ -289,7 +291,7 @@ public override String Text
}
// Finally Feader text, if there is any
- if (sheet.Footer != null && includeHeaderFooter)
+ if (sheet.Footer != null && includeHeadersFooters)
{
text.Append(
ExtractHeaderFooter(sheet.Footer)
diff --git a/main/SS/Extractor/ExcelExtractor.cs b/main/SS/Extractor/ExcelExtractor.cs
index 9cfb8ec40..91c298abd 100644
--- a/main/SS/Extractor/ExcelExtractor.cs
+++ b/main/SS/Extractor/ExcelExtractor.cs
@@ -27,7 +27,9 @@ public interface IExcelExtractor
bool IncludeCellComments { get; set; }
bool IncludeSheetNames { get; set; }
bool FormulasNotResults { get; set; }
- bool IncludeHeaderFooter { get; set; }
+ bool IncludeHeadersFooters { get; set; }
+ //Add a tab delimiter for each empty cell.
+ bool AddTabEachEmptyCell { get; set; }
/**
* Retreives the text contents of the file
*/
diff --git a/ooxml/NPOI.OOXML.Core.csproj b/ooxml/NPOI.OOXML.Core.csproj
index 08eca7f48..fb7a63b6a 100644
--- a/ooxml/NPOI.OOXML.Core.csproj
+++ b/ooxml/NPOI.OOXML.Core.csproj
@@ -13,7 +13,6 @@
-
@@ -26,6 +25,7 @@
+
diff --git a/ooxml/POIXMLTextExtractor.cs b/ooxml/POIXMLTextExtractor.cs
index ec85b072f..ecfbcaf78 100644
--- a/ooxml/POIXMLTextExtractor.cs
+++ b/ooxml/POIXMLTextExtractor.cs
@@ -39,21 +39,21 @@ public POIXMLTextExtractor(POIXMLDocument document)
/**
* Returns the core document properties
*/
- public CoreProperties GetCoreProperties()
+ public virtual CoreProperties GetCoreProperties()
{
return _document.GetProperties().CoreProperties;
}
/**
* Returns the extended document properties
*/
- public ExtendedProperties GetExtendedProperties()
+ public virtual ExtendedProperties GetExtendedProperties()
{
return _document.GetProperties().ExtendedProperties;
}
/**
* Returns the custom document properties
*/
- public CustomProperties GetCustomProperties()
+ public virtual CustomProperties GetCustomProperties()
{
return _document.GetProperties().CustomProperties;
}
diff --git a/ooxml/SS/Converter/ExcelToHtmlConverter.cs b/ooxml/SS/Converter/ExcelToHtmlConverter.cs
index e262b11f3..ed259c4a2 100644
--- a/ooxml/SS/Converter/ExcelToHtmlConverter.cs
+++ b/ooxml/SS/Converter/ExcelToHtmlConverter.cs
@@ -811,7 +811,7 @@ private static void BuildStyle_Border(IWorkbook workbook, StringBuilder style, S
var stylesSource = ((XSSFWorkbook) workbook).GetStylesSource();
if (stylesSource != null)
{
- var theme = stylesSource.GetTheme();
+ var theme = stylesSource.Theme;
if (theme != null)
color = theme.GetThemeColor(borderColor);
}
@@ -853,9 +853,9 @@ private static void BuildStyle_Font(IWorkbook workbook, StringBuilder style, IFo
{
StylesTable st = ((XSSFWorkbook)workbook).GetStylesSource();
XSSFColor fontColor = null;
- if (st != null && st.GetTheme() != null)
+ if (st != null && st.Theme != null)
{
- fontColor = st.GetTheme().GetThemeColor(font.Color);
+ fontColor = st.Theme.GetThemeColor(font.Color);
}
else
{
diff --git a/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs b/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs
new file mode 100644
index 000000000..66e13e8e3
--- /dev/null
+++ b/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs
@@ -0,0 +1,308 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+namespace NPOI.XSSF.EventUserModel
+{
+ using NPOI.OpenXml4Net.OPC;
+ using NPOI.XSSF.UserModel;
+ using System;
+ using System.Collections.Generic;
+ using System.IO;
+ using System.Text;
+ using NSAX;
+ using NSAX.Helpers;
+
+ ///
+ ///
+ ///
+ ///
+ /// This is a lightweight way to process the Shared Strings
+ /// table. Most of the text cells will reference something
+ /// from in here.
+ ///
+ ///
+ /// Note that each SI entry can have multiple T elements, if the
+ /// string is made up of bits with different formatting.
+ ///
+ ///
+ /// Example input:
+ ///
+ /// <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+ /// <sst xmlns="http://schemas.Openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
+ /// <si>
+ /// <r>
+ /// <rPr>
+ /// <b />
+ /// <sz val="11" />
+ /// <color theme="1" />
+ /// <rFont val="Calibri" />
+ /// <family val="2" />
+ /// <scheme val="minor" />
+ /// </rPr>
+ /// <t>This:</t>
+ /// </r>
+ /// <r>
+ /// <rPr>
+ /// <sz val="11" />
+ /// <color theme="1" />
+ /// <rFont val="Calibri" />
+ /// <family val="2" />
+ /// <scheme val="minor" />
+ /// </rPr>
+ /// <t xml:space="preserve">Causes Problems</t>
+ /// </r>
+ /// </si>
+ /// <si>
+ /// <t>This does not</t>
+ /// </si>
+ /// </sst>
+ ///
+ ///
+ ///
+ public class ReadOnlySharedStringsTable : DefaultHandler
+ {
+
+ private bool includePhoneticRuns;
+ ///
+ /// An integer representing the total count of strings in the workbook. This count does not
+ /// include any numbers, it counts only the total of text strings in the workbook.
+ ///
+ private int count;
+
+ ///
+ /// An integer representing the total count of unique strings in the Shared String Table.
+ /// A string is unique even if it is a copy of another string, but has different formatting applied
+ /// at the character level.
+ ///
+ private int uniqueCount;
+
+ ///
+ /// The shared strings table.
+ ///
+ private List strings;
+
+ ///
+ /// Map of phonetic strings (if they exist) indexed
+ /// with the integer matching the index in strings
+ ///
+ private Dictionary phoneticStrings;
+
+ ///
+ /// Calls with
+ /// a value of true for including phonetic runs
+ ///
+ /// The to use as basis for the shared-strings table.
+ /// If reading the data from the package fails.
+ /// if parsing the XML data fails.
+ public ReadOnlySharedStringsTable(OPCPackage pkg)
+ : this(pkg, true)
+ {
+ }
+
+ ///
+ ///
+ /// The to use as basis for the shared-strings table.
+ /// whether or not to concatenate phoneticRuns onto the shared string
+ /// IOException If reading the data from the package fails.
+ /// SAXException if parsing the XML data fails.
+ /// @since POI 3.14-Beta3
+ public ReadOnlySharedStringsTable(OPCPackage pkg, bool includePhoneticRuns)
+ {
+ this.includePhoneticRuns = includePhoneticRuns;
+ List parts =
+ pkg.GetPartsByContentType(XSSFRelation.SHARED_STRINGS.ContentType);
+
+ // Some workbooks have no shared strings table.
+ if (parts.Count > 0)
+ {
+ PackagePart sstPart = parts[0];
+ ReadFrom(sstPart.GetInputStream());
+ }
+ }
+
+ ///
+ ///
+ /// Like POIXMLDocumentPart constructor
+ ///
+ ///
+ /// Calls , with a
+ /// value of true to include phonetic runs.
+ ///
+ ///
+ /// @since POI 3.14-Beta1
+ public ReadOnlySharedStringsTable(PackagePart part)
+ : this(part, true)
+ {
+ }
+
+ ///
+ /// Like POIXMLDocumentPart constructor
+ ///
+ /// @since POI 3.14-Beta3
+ public ReadOnlySharedStringsTable(PackagePart part, bool includePhoneticRuns)
+
+ {
+
+ this.includePhoneticRuns = includePhoneticRuns;
+ ReadFrom(part.GetInputStream());
+ }
+
+ ///
+ /// Read this shared strings table from an XML file.
+ ///
+ /// The input stream containing the XML document.
+ /// if an error occurs while reading.
+ /// if parsing the XML data fails.
+ public void ReadFrom(Stream is1)
+ {
+ // test if the file is empty, otherwise parse it
+ //PushbackInputStream pis = new PushbackInputStream(is1, 1);
+ //int emptyTest = pis.Read();
+ //if (emptyTest > -1)
+ if(is1.Length > 0)
+ {
+ //pis.Unread(emptyTest);
+ InputSource sheetSource = new InputSource(is1);
+ //try
+ {
+ NSAX.AElfred.SAXDriver sheetParser = new NSAX.AElfred.SAXDriver();
+ sheetParser.ContentHandler = (this);
+ sheetParser.Parse(sheetSource);
+ }
+ //catch (ParserConfigurationException e)
+ //{
+ // throw new RuntimeException("SAX parser appears to be broken - " + e.GetMessage());
+ //}
+ }
+ }
+
+ ///
+ /// Return an integer representing the total count of strings in the workbook. This count does not
+ /// include any numbers, it counts only the total of text strings in the workbook.
+ ///
+ /// the total count of strings in the workbook
+ public int Count => count;
+
+ ///
+ /// Returns an integer representing the total count of unique strings in the Shared String Table.
+ /// A string is unique even if it is a copy of another string, but has different formatting applied
+ /// at the character level.
+ ///
+ /// the total count of unique strings in the workbook
+ public int UniqueCount => uniqueCount;
+
+ ///
+ /// Return the string at a given index.
+ /// Formatting is ignored.
+ ///
+ /// index of item to return.
+ /// the item at the specified position in this Shared String table.
+ public String GetEntryAt(int idx)
+ {
+ return strings[idx];
+ }
+
+ public List Items => strings;
+
+
+ //// ContentHandler methods ////
+
+ private StringBuilder characters;
+ private bool tIsOpen;
+ private bool inRPh;
+
+ public override void StartElement(String uri, String localName, String name,
+ IAttributes attributes)
+ {
+
+ if (uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML))
+ {
+ return;
+ }
+
+ if ("sst".Equals(localName))
+ {
+ String count = attributes.GetValue("count");
+ if (count != null) this.count = Int32.Parse(count);
+ String uniqueCount = attributes.GetValue("uniqueCount");
+ if (uniqueCount != null) this.uniqueCount = Int32.Parse(uniqueCount);
+
+ this.strings = new List(this.uniqueCount);
+ this.phoneticStrings = new Dictionary();
+ characters = new StringBuilder();
+ }
+ else if ("si".Equals(localName))
+ {
+ characters.Length = 0;
+ }
+ else if ("t".Equals(localName))
+ {
+ tIsOpen = true;
+ }
+ else if ("rPh".Equals(localName))
+ {
+ inRPh = true;
+ //append space...this assumes that rPh always comes After regular
+ if (includePhoneticRuns && characters.Length > 0)
+ {
+ characters.Append(" ");
+ }
+ }
+ }
+
+ public override void EndElement(String uri, String localName, String name)
+
+ {
+
+ if (uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML))
+ {
+ return;
+ }
+
+ if ("si".Equals(localName))
+ {
+ strings.Add(characters.ToString());
+ }
+ else if ("t".Equals(localName))
+ {
+ tIsOpen = false;
+ }
+ else if ("rPh".Equals(localName))
+ {
+ inRPh = false;
+ }
+ }
+
+ ///
+ /// Captures characters only if a t(ext) element is open.
+ ///
+ public override void Characters(char[] ch, int start, int length)
+ {
+ if (tIsOpen)
+ {
+ if (inRPh && includePhoneticRuns)
+ {
+ characters.Append(ch, start, length);
+ }
+ else if (!inRPh)
+ {
+ characters.Append(ch, start, length);
+ }
+ }
+ }
+ }
+}
+
diff --git a/ooxml/XSSF/EventUserModel/XSSFReader.cs b/ooxml/XSSF/EventUserModel/XSSFReader.cs
new file mode 100644
index 000000000..cc27d17fd
--- /dev/null
+++ b/ooxml/XSSF/EventUserModel/XSSFReader.cs
@@ -0,0 +1,502 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace NPOI.XSSF.EventUserModel
+{
+
+ using NPOI;
+ using NPOI.OpenXml4Net.Exceptions;
+ using NPOI.OpenXml4Net.OPC;
+ using NPOI.Util;
+ using NPOI.XSSF.Model;
+ using NPOI.XSSF.UserModel;
+ using NSAX;
+ using NSAX.Helpers;
+ using System.Xml;
+
+ ///
+ /// This class makes it easy to Get at individual parts
+ /// of an OOXML .xlsx file, suitable for low memory sax
+ /// parsing or similar.
+ /// It makes up the core part of the EventUserModel support
+ /// for XSSF.
+ ///
+ public class XSSFReader
+ {
+
+ private static ISet WORKSHEET_RELS =
+ new HashSet(
+ Arrays.AsList(new String[]{
+ XSSFRelation.WORKSHEET.Relation,
+ XSSFRelation.CHARTSHEET.Relation,
+ })
+ );
+ //private static POILogger LOGGER = POILogFactory.GetLogger(XSSFReader.class);
+
+ protected OPCPackage pkg;
+ protected PackagePart workbookPart;
+
+ ///
+ /// Creates a new XSSFReader, for the given package
+ ///
+ public XSSFReader(OPCPackage pkg)
+ {
+
+ this.pkg = pkg;
+
+ PackageRelationship coreDocRelationship = this.pkg.GetRelationshipsByType(
+ PackageRelationshipTypes.CORE_DOCUMENT).GetRelationship(0);
+
+ // strict OOXML likely not fully supported, see #57699
+ // this code is similar to POIXMLDocumentPart.PartFromOPCPackage, but I could not combine it
+ // easily due to different return values
+ if (coreDocRelationship == null)
+ {
+ if (this.pkg.GetRelationshipsByType(
+ PackageRelationshipTypes.STRICT_CORE_DOCUMENT).GetRelationship(0) != null)
+ {
+ throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
+ }
+
+ throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
+ }
+
+ // Get the part that holds the workbook
+ workbookPart = this.pkg.GetPart(coreDocRelationship);
+ }
+
+
+ ///
+ /// Opens up the Shared Strings Table, parses it, and
+ /// returns a handy object for working with
+ /// shared strings.
+ ///
+ public SharedStringsTable SharedStringsTable
+ {
+ get
+ {
+ List parts = pkg.GetPartsByContentType(XSSFRelation.SHARED_STRINGS.ContentType);
+ return parts.Count == 0 ? null : new SharedStringsTable(parts[0]);
+ }
+ }
+
+ ///
+ /// Opens up the Styles Table, parses it, and
+ /// returns a handy object for working with cell styles
+ ///
+ public StylesTable StylesTable
+ {
+ get
+ {
+ List parts = pkg.GetPartsByContentType(XSSFRelation.STYLES.ContentType);
+ if(parts.Count == 0)
+ return null;
+
+ // Create the Styles Table, and associate the Themes if present
+ StylesTable styles = new StylesTable(parts[0]);
+ parts = pkg.GetPartsByContentType(XSSFRelation.THEME.ContentType);
+ if(parts.Count != 0)
+ {
+ styles.Theme = (new ThemesTable(parts[0]));
+ }
+ return styles;
+ }
+
+ }
+
+
+ ///
+ /// Returns an InputStream to read the contents of the
+ /// shared strings table.
+ ///
+ public Stream SharedStringsData => XSSFRelation.SHARED_STRINGS.GetContents(workbookPart);
+
+ ///
+ /// Returns an InputStream to read the contents of the
+ /// styles table.
+ ///
+ public Stream StylesData => XSSFRelation.STYLES.GetContents(workbookPart);
+
+ ///
+ /// Returns an InputStream to read the contents of the
+ /// themes table.
+ ///
+ public Stream ThemesData => XSSFRelation.THEME.GetContents(workbookPart);
+
+ ///
+ /// Returns an InputStream to read the contents of the
+ /// main Workbook, which contains key overall data for
+ /// the file, including sheet definitions.
+ ///
+ public Stream WorkbookData => workbookPart.GetInputStream();
+
+ ///
+ /// Returns an InputStream to read the contents of the
+ /// specified Sheet.
+ ///
+ /// The relationId of the sheet, from a r:id on the workbook
+ public Stream GetSheet(String relId)
+ {
+
+ PackageRelationship rel = workbookPart.GetRelationship(relId);
+ if (rel == null)
+ {
+ throw new ArgumentException("No Sheet found with r:id " + relId);
+ }
+
+ PackagePartName relName = PackagingUriHelper.CreatePartName(rel.TargetUri);
+ PackagePart sheet = pkg.GetPart(relName);
+ if (sheet == null)
+ {
+ throw new ArgumentException("No data found for Sheet with r:id " + relId);
+ }
+ return sheet.GetInputStream();
+ }
+
+ ///
+ /// Returns an Iterator which will let you Get at all the
+ /// different Sheets in turn.
+ /// Each sheet's InputStream is only opened when fetched
+ /// from the Iterator. It's up to you to close the
+ /// InputStreams when done with each one.
+ ///
+ public IEnumerator GetSheetsData()
+ {
+
+ return new SheetIterator(workbookPart);
+ }
+
+ ///
+ /// Iterator over sheet data.
+ ///
+ public class SheetIterator : IEnumerator
+ {
+
+ ///
+ /// Maps relId and the corresponding PackagePart
+ ///
+ private Dictionary sheetMap;
+
+ ///
+ /// Current sheet reference
+ ///
+ XSSFSheetRef xssfSheetRef;
+
+ ///
+ /// Iterator over CTSheet objects, returns sheets in logical order.
+ /// We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
+ /// i.e. as they are stored in the underlying package
+ ///
+ IEnumerator sheetIterator;
+
+
+ ///
+ /// Construct a new SheetIterator
+ ///
+ /// package part holding workbook.xml
+ internal SheetIterator(PackagePart wb)
+ {
+ /*
+ * The order of sheets is defined by the order of CTSheet elements in workbook.xml
+ */
+ try
+ {
+ //step 1. Map sheet's relationship Id and the corresponding PackagePart
+ sheetMap = new Dictionary();
+ OPCPackage pkg = wb.Package;
+ ISet worksheetRels = SheetRelationships;
+ foreach (PackageRelationship rel in wb.Relationships)
+ {
+ String relType = rel.RelationshipType;
+ if (worksheetRels.Contains(relType))
+ {
+ PackagePartName relName = PackagingUriHelper.CreatePartName(rel.TargetUri);
+ sheetMap.Add(rel.Id, pkg.GetPart(relName));
+ }
+ }
+ //step 2. Read array of CTSheet elements, wrap it in a LinkedList
+ //and construct an iterator
+ sheetIterator = CreateSheetIteratorFromWB(wb).GetEnumerator();
+ }
+ catch (InvalidFormatException e)
+ {
+ throw new POIXMLException(e);
+ }
+ }
+
+ static List CreateSheetIteratorFromWB(PackagePart wb)
+ {
+ XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
+ NSAX.AElfred.SAXDriver xmlReader;
+ try
+ {
+ xmlReader = new NSAX.AElfred.SAXDriver();// SAXHelper.newXMLReader();
+ }
+ //catch (ParserConfigurationException e)
+ //{
+ // throw new POIXMLException(e);
+ //}
+ catch (SAXException e)
+ {
+ throw new POIXMLException(e);
+ }
+ xmlReader.ContentHandler = (xmlSheetRefReader);
+ try
+ {
+ xmlReader.Parse(new InputSource(wb.GetInputStream()));
+ }
+ catch (SAXException e)
+ {
+ throw new POIXMLException(e);
+ }
+
+ List validSheets = new List();
+ foreach (XSSFSheetRef xssfSheetRef in xmlSheetRefReader.GetSheetRefs())
+ {
+ //if there's no relationship id, silently skip the sheet
+ String sheetId = xssfSheetRef.Id;
+ if (sheetId != null && sheetId.Length > 0)
+ {
+ validSheets.Add(xssfSheetRef);
+ }
+ }
+ return validSheets;
+ }
+
+ ///
+ /// Gets string representations of relationships
+ /// that are sheet-like. Added to allow subclassing
+ /// by XSSFBReader. This is used to decide what
+ /// relationships to load into the sheetRefs
+ ///
+ /// all relationships that are sheet-like
+ static ISet SheetRelationships => WORKSHEET_RELS;
+
+ ///
+ /// Returns true if the iteration has more elements.
+ ///
+ /// true if the iterator has more elements.
+ //public bool HasNext()
+ //{
+ // return sheetIterator.HasNext();
+ //}
+
+ ///
+ /// Returns input stream of the next sheet in the iteration
+ ///
+ /// input stream of the next sheet in the iteration
+ private Stream Next()
+ {
+ xssfSheetRef = sheetIterator.Current;
+
+ String sheetId = xssfSheetRef.Id;
+ try
+ {
+ PackagePart sheetPkg = sheetMap[sheetId];
+ return sheetPkg.GetInputStream();
+ }
+ catch (IOException e)
+ {
+ throw new POIXMLException(e);
+ }
+ }
+
+ public Stream Current => Next();
+
+ object IEnumerator.Current => Next();
+ public bool MoveNext()
+ {
+ return sheetIterator.MoveNext();
+ }
+
+ public void Reset()
+ {
+ sheetIterator.Reset();
+ }
+
+ public void Dispose()
+ {
+ sheetIterator.Dispose();
+ }
+
+ ///
+ /// Returns name of the current sheet
+ ///
+ /// name of the current sheet
+ public String SheetName => xssfSheetRef.Name;
+
+ ///
+ /// Returns the comments associated with this sheet,
+ /// or null if there aren't any
+ ///
+ public CommentsTable SheetComments
+ {
+ get
+ {
+ PackagePart sheetPkg = SheetPart;
+
+ // Do we have a comments relationship? (Only ever one if so)
+ try
+ {
+ PackageRelationshipCollection commentsList =
+ sheetPkg.GetRelationshipsByType(XSSFRelation.SHEET_COMMENTS.Relation);
+ if(commentsList.Size > 0)
+ {
+ PackageRelationship comments = commentsList.GetRelationship(0);
+ PackagePartName commentsName = PackagingUriHelper.CreatePartName(comments.TargetUri);
+ PackagePart commentsPart = sheetPkg.Package.GetPart(commentsName);
+ return new CommentsTable(commentsPart);
+ }
+ }
+ catch(InvalidFormatException)
+ {
+ return null;
+ }
+ catch(IOException)
+ {
+ return null;
+ }
+ return null;
+ }
+ }
+
+ ///
+ /// Returns the shapes associated with this sheet,
+ /// an empty list or null if there is an exception
+ ///
+ public List Shapes
+ {
+ get
+ {
+ PackagePart sheetPkg = SheetPart;
+ List shapes = new List();
+ // Do we have a comments relationship? (Only ever one if so)
+ try
+ {
+ PackageRelationshipCollection drawingsList = sheetPkg.GetRelationshipsByType(XSSFRelation.DRAWINGS.Relation);
+ for(int i = 0; i < drawingsList.Size; i++)
+ {
+ PackageRelationship drawings = drawingsList.GetRelationship(i);
+ PackagePartName drawingsName = PackagingUriHelper.CreatePartName(drawings.TargetUri);
+ PackagePart drawingsPart = sheetPkg.Package.GetPart(drawingsName);
+ if(drawingsPart == null)
+ {
+ //parts can go missing; Excel ignores them silently -- TIKA-2134
+ //LOGGER.log(POILogger.WARN, "Missing Drawing: " + drawingsName + ". Skipping it.");
+ continue;
+ }
+ XSSFDrawing drawing = new XSSFDrawing(drawingsPart);
+ shapes.AddRange(drawing.GetShapes());
+ }
+ }
+ catch(XmlException)
+ {
+ return null;
+ }
+ catch(InvalidFormatException)
+ {
+ return null;
+ }
+ catch(IOException)
+ {
+ return null;
+ }
+ return shapes;
+ }
+ }
+
+ public PackagePart SheetPart => sheetMap[xssfSheetRef.Id];
+
+ ///
+ /// We're read only, so remove isn't supported
+ ///
+ public void Remove()
+ {
+ throw new InvalidOperationException("Not supported");
+ }
+
+
+ }
+
+ public sealed class XSSFSheetRef
+ {
+ //do we need to store sheetId, too?
+ private String id;
+ private String name;
+
+ public XSSFSheetRef(String id, String name)
+ {
+ this.id = id;
+ this.name = name;
+ }
+
+ public String Id => id;
+
+ public String Name => name;
+ }
+
+ //scrapes sheet reference info and order from workbook.xml
+ private class XMLSheetRefReader : DefaultHandler
+ {
+ private static String SHEET = "sheet";
+ private static String ID = "id";
+ private static String NAME = "name";
+
+ private List sheetRefs = new List();
+
+ // read
+ // and add XSSFSheetRef(id="rId6", name="Sheet6") to sheetRefs
+ public override void StartElement(String uri, String localName, String qName, IAttributes attrs)
+ {
+
+ if (localName.Equals(SHEET, StringComparison.OrdinalIgnoreCase))
+ {
+ String name = null;
+ String id = null;
+ for (int i = 0; i < attrs.Length; i++)
+ {
+ String attrName = attrs.GetLocalName(i);
+ if (attrName.Equals(NAME, StringComparison.OrdinalIgnoreCase))
+ {
+ name = attrs.GetValue(i);
+ }
+ else if (attrName.Equals(ID, StringComparison.OrdinalIgnoreCase))
+ {
+ id = attrs.GetValue(i);
+ }
+ if (name != null && id != null)
+ {
+ sheetRefs.Add(new XSSFSheetRef(id, name));
+ break;
+ }
+ }
+ }
+ }
+
+ public List GetSheetRefs()
+ {
+ return sheetRefs;
+ }
+ }
+ }
+}
diff --git a/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs b/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs
new file mode 100644
index 000000000..71c7a04d9
--- /dev/null
+++ b/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs
@@ -0,0 +1,611 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace NPOI.XSSF.EventUserModel
+{
+ using static NPOI.XSSF.UserModel.XSSFRelation;
+
+
+ using NPOI.SS.UserModel;
+ using NPOI.SS.Util;
+ using NPOI.Util;
+ using NPOI.XSSF.Model;
+ using NPOI.XSSF.UserModel;
+ using NSAX.Helpers;
+ using NPOI.OpenXmlFormats.Spreadsheet;
+ using NSAX;
+
+ ///
+ /// This class handles the processing of a sheet#.xml
+ /// sheet part of a XSSF .xlsx file, and generates
+ /// row and cell events for it.
+ ///
+ public class XSSFSheetXMLHandler : DefaultHandler
+ {
+ //private static POILogger logger = POILogFactory.GetLogger(XSSFSheetXMLHandler.class);
+
+ ///
+ /// These are the different kinds of cells we support.
+ /// We keep track of the current one between
+ /// the start and end.
+ ///
+ enum XSSFDataType
+ {
+ Boolean,
+ Error,
+ Formula,
+ InlineString,
+ SSTString,
+ Number,
+ }
+
+ ///
+ /// Table with the styles used for formatting
+ ///
+ private StylesTable stylesTable;
+
+ ///
+ /// Table with cell comments
+ ///
+ private CommentsTable commentsTable;
+
+ ///
+ /// Read only access to the shared strings table, for looking
+ /// up (most) string cell's contents
+ ///
+ private ReadOnlySharedStringsTable sharedStringsTable;
+
+ ///
+ /// Where our text is going
+ ///
+ private SheetContentsHandler output;
+
+ // Set when V start element is seen
+ private bool vIsOpen;
+ // Set when F start element is seen
+ private bool fIsOpen;
+ // Set when an Inline String "is" is seen
+ private bool isIsOpen;
+ // Set when a header/footer element is seen
+ private bool hfIsOpen;
+
+ // Set when cell start element is seen;
+ // used when cell close element is seen.
+ private XSSFDataType nextDataType;
+
+ // Used to format numeric cell values.
+ private short formatIndex;
+ private String formatString;
+ private DataFormatter formatter;
+ private int rowNum;
+ private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
+ private String cellRef;
+ private bool formulasNotResults;
+
+ // Gathers characters as they are seen.
+ private StringBuilder value = new StringBuilder();
+ private StringBuilder formula = new StringBuilder();
+ private StringBuilder headerFooter = new StringBuilder();
+
+ private Queue commentCellRefs;
+
+ ///
+ /// Accepts objects needed while parsing.
+ ///
+ /// Table of styles
+ /// Table of shared strings
+ public XSSFSheetXMLHandler(
+ StylesTable styles,
+ CommentsTable comments,
+ ReadOnlySharedStringsTable strings,
+ SheetContentsHandler sheetContentsHandler,
+ DataFormatter dataFormatter,
+ bool formulasNotResults)
+ {
+ this.stylesTable = styles;
+ this.commentsTable = comments;
+ this.sharedStringsTable = strings;
+ this.output = sheetContentsHandler;
+ this.formulasNotResults = formulasNotResults;
+ this.nextDataType = XSSFDataType.Number;
+ this.formatter = dataFormatter;
+ Init();
+ }
+
+ ///
+ /// Accepts objects needed while parsing.
+ ///
+ /// Table of styles
+ /// Table of shared strings
+ public XSSFSheetXMLHandler(
+ StylesTable styles,
+ ReadOnlySharedStringsTable strings,
+ SheetContentsHandler sheetContentsHandler,
+ DataFormatter dataFormatter,
+ bool formulasNotResults)
+ : this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults)
+ {
+
+ }
+
+ ///
+ /// Accepts objects needed while parsing.
+ ///
+ /// Table of styles
+ /// Table of shared strings
+ public XSSFSheetXMLHandler(
+ StylesTable styles,
+ ReadOnlySharedStringsTable strings,
+ SheetContentsHandler sheetContentsHandler,
+ bool formulasNotResults)
+ : this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults)
+ {
+
+ }
+
+ private void Init()
+ {
+ if(commentsTable != null)
+ {
+ commentCellRefs = new Queue();
+ //noinspection deprecation
+ foreach(CT_Comment comment in commentsTable.GetCTComments().commentList.GetCommentArray())
+ {
+ commentCellRefs.Enqueue(new CellAddress(comment.@ref));
+ }
+ }
+ }
+
+ private bool IsTextTag(String name)
+ {
+ if("v".Equals(name))
+ {
+ // Easy, normal v text tag
+ return true;
+ }
+ if("inlineStr".Equals(name))
+ {
+ // Easy inline string
+ return true;
+ }
+ if("t".Equals(name) && isIsOpen)
+ {
+ // Inline string ... pair
+ return true;
+ }
+ // It isn't a text tag
+ return false;
+ }
+ public override void StartElement(String uri, String localName, String qName,
+ IAttributes attributes)
+ {
+
+
+ if(uri != null && !uri.Equals(NS_SPREADSHEETML))
+ {
+ return;
+ }
+
+ if(IsTextTag(localName))
+ {
+ vIsOpen = true;
+ // Clear contents cache
+ value.Length = 0;
+ }
+ else if("is".Equals(localName))
+ {
+ // Inline string outer tag
+ isIsOpen = true;
+ }
+ else if("f".Equals(localName))
+ {
+ // Clear contents cache
+ formula.Length = 0;
+
+ // Mark us as being a formula if not already
+ if(nextDataType == XSSFDataType.Number)
+ {
+ nextDataType = XSSFDataType.Formula;
+ }
+
+ // Decide where to Get the formula string from
+ String type = attributes.GetValue("t");
+ if(type != null && type.Equals("shared"))
+ {
+ // Is it the one that defines the shared, or uses it?
+ String ref1 = attributes.GetValue("ref");
+ String si = attributes.GetValue("si");
+
+ if(ref1 != null)
+ {
+ // This one defines it
+ // TODO Save it somewhere
+ fIsOpen = true;
+ }
+ else
+ {
+ // This one uses a shared formula
+ // TODO Retrieve the shared formula and tweak it to
+ // match the current cell
+ if(formulasNotResults)
+ {
+ //logger.log(POILogger.WARN, "shared formulas not yet supported!");
+ }
+ /*else {
+ // It's a shared formula, so we can't Get at the formula string yet
+ // However, they don't care about the formula string, so that's ok!
+ }*/
+ }
+ }
+ else
+ {
+ fIsOpen = true;
+ }
+ }
+ else if("oddHeader".Equals(localName) || "evenHeader".Equals(localName) ||
+ "firstHeader".Equals(localName) || "firstFooter".Equals(localName) ||
+ "oddFooter".Equals(localName) || "evenFooter".Equals(localName))
+ {
+ hfIsOpen = true;
+ // Clear contents cache
+ headerFooter.Length = 0;
+ }
+ else if("row".Equals(localName))
+ {
+ String rowNumStr = attributes.GetValue("r");
+ if(rowNumStr != null)
+ {
+ rowNum = Int32.Parse(rowNumStr) - 1;
+ }
+ else
+ {
+ rowNum = nextRowNum;
+ }
+ output.StartRow(rowNum);
+ }
+ // c => cell
+ else if("c".Equals(localName))
+ {
+ // Set up defaults.
+ this.nextDataType = XSSFDataType.Number;
+ this.formatIndex = -1;
+ this.formatString = null;
+ cellRef = attributes.GetValue("r");
+ String cellType = attributes.GetValue("t");
+ String cellStyleStr = attributes.GetValue("s");
+ if("b".Equals(cellType))
+ nextDataType = XSSFDataType.Boolean;
+ else if("e".Equals(cellType))
+ nextDataType = XSSFDataType.Error;
+ else if("inlineStr".Equals(cellType))
+ nextDataType = XSSFDataType.InlineString;
+ else if("s".Equals(cellType))
+ nextDataType = XSSFDataType.SSTString;
+ else if("str".Equals(cellType))
+ nextDataType = XSSFDataType.Formula;
+ else
+ {
+ // Number, but almost certainly with a special style or format
+ XSSFCellStyle style = null;
+ if(stylesTable != null)
+ {
+ if(cellStyleStr != null)
+ {
+ int styleIndex = int.Parse(cellStyleStr);
+ style = stylesTable.GetStyleAt(styleIndex);
+ }
+ else if(stylesTable.NumCellStyles > 0)
+ {
+ style = stylesTable.GetStyleAt(0);
+ }
+ }
+ if(style != null)
+ {
+ this.formatIndex = style.DataFormat;
+ this.formatString = style.GetDataFormatString();
+ if(this.formatString == null)
+ this.formatString = BuiltinFormats.GetBuiltinFormat(this.formatIndex);
+ }
+ }
+ }
+ }
+ public override void EndElement(String uri, String localName, String qName)
+
+ {
+
+
+ if(uri != null && !uri.Equals(NS_SPREADSHEETML))
+ {
+ return;
+ }
+
+ String thisStr = null;
+
+ // v => contents of a cell
+ if(IsTextTag(localName))
+ {
+ vIsOpen = false;
+
+ // Process the value contents as required, now we have it all
+ switch(nextDataType)
+ {
+ case XSSFDataType.Boolean:
+ char first = value[0];
+ thisStr = first == '0' ? "FALSE" : "TRUE";
+ break;
+
+ case XSSFDataType.Error:
+ thisStr = "ERROR:" + value;
+ break;
+
+ case XSSFDataType.Formula:
+ if(formulasNotResults)
+ {
+ thisStr = formula.ToString();
+ }
+ else
+ {
+ String fv = value.ToString();
+
+ if(this.formatString != null)
+ {
+ try
+ {
+ // Try to use the value as a formattable number
+ double d = double.Parse(fv);
+ thisStr = formatter.FormatRawCellContents(d, this.formatIndex, this.formatString);
+ }
+ catch(FormatException)
+ {
+ // Formula is a String result not a Numeric one
+ thisStr = fv;
+ }
+ }
+ else
+ {
+ // No formatting applied, just do raw value in all cases
+ thisStr = fv;
+ }
+ }
+ break;
+
+ case XSSFDataType.InlineString:
+ // TODO: Can these ever have formatting on them?
+ XSSFRichTextString rtsi = new XSSFRichTextString(value.ToString());
+ thisStr = rtsi.ToString();
+ break;
+
+ case XSSFDataType.SSTString:
+ String sstIndex = value.ToString();
+ try
+ {
+ int idx = int.Parse(sstIndex);
+ XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.GetEntryAt(idx));
+ thisStr = rtss.ToString();
+ }
+ catch(FormatException)
+ {
+ //logger.log(POILogger.ERROR, "Failed to parse SST index '" + sstIndex, ex);
+ }
+ break;
+
+ case XSSFDataType.Number:
+ String n = value.ToString();
+ if(this.formatString != null && n.Length > 0)
+ thisStr = formatter.FormatRawCellContents(Double.Parse(n), this.formatIndex, this.formatString);
+ else
+ thisStr = n;
+ break;
+
+ default:
+ thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
+ break;
+ }
+
+ // Do we have a comment for this cell?
+ CheckForEmptyCellComments(EmptyCellCommentsCheckType.Cell);
+ XSSFComment comment = commentsTable != null ? commentsTable.FindCellComment(new CellAddress(cellRef)) : null;
+
+ // Output
+ output.Cell(cellRef, thisStr, comment);
+ }
+ else if("f".Equals(localName))
+ {
+ fIsOpen = false;
+ }
+ else if("is".Equals(localName))
+ {
+ isIsOpen = false;
+ }
+ else if("row".Equals(localName))
+ {
+ // Handle any "missing" cells which had comments attached
+ CheckForEmptyCellComments(EmptyCellCommentsCheckType.EndOfRow);
+
+ // Finish up the row
+ output.EndRow(rowNum);
+
+ // some sheets do not have rowNum Set in the XML, Excel can read them so we should try to read them as well
+ nextRowNum = rowNum + 1;
+ }
+ else if("sheetData".Equals(localName))
+ {
+ // Handle any "missing" cells which had comments attached
+ CheckForEmptyCellComments(EmptyCellCommentsCheckType.EndOfSheetData);
+ }
+ else if("oddHeader".Equals(localName) || "evenHeader".Equals(localName) ||
+ "firstHeader".Equals(localName))
+ {
+ hfIsOpen = false;
+ output.HeaderFooter(headerFooter.ToString(), true, localName);
+ }
+ else if("oddFooter".Equals(localName) || "evenFooter".Equals(localName) ||
+ "firstFooter".Equals(localName))
+ {
+ hfIsOpen = false;
+ output.HeaderFooter(headerFooter.ToString(), false, localName);
+ }
+ }
+
+ ///
+ /// Captures characters only if a suitable element is open.
+ /// Originally was just "v"; extended for inlineStr also.
+ ///
+ public override void Characters(char[] ch, int start, int length)
+
+ {
+
+ if(vIsOpen)
+ {
+ value.Append(ch, start, length);
+ }
+ if(fIsOpen)
+ {
+ formula.Append(ch, start, length);
+ }
+ if(hfIsOpen)
+ {
+ headerFooter.Append(ch, start, length);
+ }
+ }
+
+ ///
+ /// Do a check for, and output, comments in otherwise empty cells.
+ ///
+ private void CheckForEmptyCellComments(EmptyCellCommentsCheckType type)
+ {
+ if(commentCellRefs != null && commentCellRefs.Count>0)
+ {
+ // If we've reached the end of the sheet data, output any
+ // comments we haven't yet already handled
+ if(type == EmptyCellCommentsCheckType.EndOfSheetData)
+ {
+ while(commentCellRefs.Count>0)
+ {
+ OutputEmptyCellComment(commentCellRefs.Dequeue());
+ }
+ return;
+ }
+
+ // At the end of a row, handle any comments for "missing" rows before us
+ if(this.cellRef == null)
+ {
+ if(type == EmptyCellCommentsCheckType.EndOfRow)
+ {
+ while(commentCellRefs.Count>0)
+ {
+ if(commentCellRefs.Peek().Row == rowNum)
+ {
+ OutputEmptyCellComment(commentCellRefs.Dequeue());
+ }
+ else
+ {
+ return;
+ }
+ }
+ return;
+ }
+ else
+ {
+ throw new InvalidOperationException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum);
+ }
+ }
+
+ CellAddress nextCommentCellRef;
+ do
+ {
+ CellAddress cellRef = new CellAddress(this.cellRef);
+ CellAddress peekCellRef = commentCellRefs.Peek();
+ if(type == EmptyCellCommentsCheckType.Cell && cellRef.Equals(peekCellRef))
+ {
+ // remove the comment cell ref from the list if we're about to handle it alongside the cell content
+ commentCellRefs.Dequeue();
+ return;
+ }
+ else
+ {
+ // fill in any gaps if there are empty cells with comment mixed in with non-empty cells
+ int comparison = peekCellRef.CompareTo(cellRef);
+ if(comparison > 0 && type == EmptyCellCommentsCheckType.EndOfRow && peekCellRef.Row <= rowNum)
+ {
+ nextCommentCellRef = commentCellRefs.Dequeue();
+ OutputEmptyCellComment(nextCommentCellRef);
+ }
+ else if(comparison < 0 && type == EmptyCellCommentsCheckType.Cell && peekCellRef.Row <= rowNum)
+ {
+ nextCommentCellRef = commentCellRefs.Dequeue();
+ OutputEmptyCellComment(nextCommentCellRef);
+ }
+ else
+ {
+ nextCommentCellRef = null;
+ }
+ }
+ } while(nextCommentCellRef != null && commentCellRefs.Count>0);
+ }
+ }
+
+
+ ///
+ /// Output an empty-cell comment.
+ ///
+ private void OutputEmptyCellComment(CellAddress cellRef)
+ {
+ XSSFComment comment = commentsTable.FindCellComment(cellRef);
+ output.Cell(cellRef.FormatAsString(), null, comment);
+ }
+
+ private enum EmptyCellCommentsCheckType
+ {
+ Cell,
+ EndOfRow,
+ EndOfSheetData
+ }
+
+ ///
+ /// You need to implement this to handle the results
+ /// of the sheet parsing.
+ ///
+ public interface SheetContentsHandler
+ {
+ ///
+ /// A row with the (zero based) row number has started */
+ ///
+ public void StartRow(int rowNum);
+ ///
+ /// A row with the (zero based) row number has ended */
+ ///
+ public void EndRow(int rowNum);
+ ///
+ /// A cell, with the given formatted value (may be null),
+ /// and possibly a comment (may be null), was encountered */
+ ///
+ public void Cell(String cellReference, String formattedValue, XSSFComment comment);
+ ///
+ /// A header or footer has been encountered */
+ ///
+ public void HeaderFooter(String text, bool IsHeader, String tagName);
+ }
+ }
+}
+
diff --git a/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs b/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs
index e7009b2e5..c0db763ab 100644
--- a/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs
+++ b/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs
@@ -1,7 +1,7 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
- this work for Additional information regarding copyright ownership.
+ this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
@@ -14,110 +14,201 @@ the License. You may obtain a copy of the License at
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
-using NPOI.OpenXml4Net.OPC;
+
using System;
-using NPOI.SS.UserModel;
-using NPOI.XSSF.Model;
+using System.Collections;
+using System.Collections.Generic;
using System.IO;
using System.Text;
+
namespace NPOI.XSSF.Extractor
{
+ using NPOI;
+ using NPOI.OpenXml4Net.Exceptions;
+ using NPOI.OpenXml4Net.OPC;
+ using NPOI.SS.UserModel;
+ using NPOI.SS.Extractor;
+ using NPOI.Util;
+ using NPOI.XSSF.EventUserModel;
+ using NPOI.XSSF.Model;
+ using NPOI.XSSF.UserModel;
+ using System.Globalization;
+ using NSAX;
+ using NSAX.AElfred;
+ using static NPOI.XSSF.EventUserModel.XSSFSheetXMLHandler;
+ using NPOI.OpenXml4Net;
- /**
- * Implementation of a text extractor from OOXML Excel
- * files that uses SAX event based parsing.
- */
- public class XSSFEventBasedExcelExtractor : POIXMLTextExtractor
+ ///
+ /// Implementation of a text extractor from OOXML Excel
+ /// files that uses SAX event based parsing.
+ ///
+ public class XSSFEventBasedExcelExtractor : POIXMLTextExtractor, IExcelExtractor
{
+
+ //private static POILogger LOGGER = POILogFactory.GetLogger(XSSFEventBasedExcelExtractor.class);
+
private OPCPackage container;
private POIXMLProperties properties;
- private Locale locale;
+ private CultureInfo locale;
+ private bool includeTextBoxes = true;
private bool includeSheetNames = true;
+ private bool includeCellComments = false;
+ private bool includeHeadersFooters = true;
private bool formulasNotResults = false;
+ private bool concatenatePhoneticRuns = true;
public XSSFEventBasedExcelExtractor(String path)
: this(OPCPackage.Open(path))
{
-
}
- public XSSFEventBasedExcelExtractor(OPCPackage Container)
+ public XSSFEventBasedExcelExtractor(OPCPackage container)
: base(null)
{
- this.container = Container;
- properties = new POIXMLProperties(Container);
+ this.container = container;
+
+ properties = new POIXMLProperties(container);
+ }
+
+ public static void main(String[] args)
+ {
+
+ if(args.Length < 1)
+ {
+ Console.WriteLine("Use:");
+ Console.WriteLine(" XSSFEventBasedExcelExtractor ");
+ return;
+ }
+ var extractor = new XSSFEventBasedExcelExtractor(args[0]);
+ Console.WriteLine(extractor.Text);
+ extractor.Close();
}
- /**
- * Should sheet names be included? Default is true
- */
- public void SetIncludeSheetNames(bool includeSheetNames)
+ ///
+ /// Get or Set should sheet names be included? Default is true
+ ///
+ public bool IncludeSheetNames
{
- this.includeSheetNames = includeSheetNames;
+ get
+ {
+ return includeSheetNames;
+ }
+ set
+ {
+ includeSheetNames = value;
+ }
}
- /**
- * Should we return the formula itself, and not
- * the result it produces? Default is false
- */
- public void SetFormulasNotResults(bool formulasNotResults)
+
+
+ ///
+ /// Should we return the formula itself, and not
+ /// the result it produces? Default is false
+ ///
+ public bool FormulasNotResults
{
- this.formulasNotResults = formulasNotResults;
+ get { return formulasNotResults; }
+ set { formulasNotResults = value; }
}
- public void SetLocale(Locale locale)
+ ///
+ /// Should headers and footers be included? Default is true
+ ///
+ public bool IncludeHeadersFooters
{
- this.locale = locale;
+ get { return includeHeadersFooters; }
+ set { includeHeadersFooters = value; }
}
- /**
- * Returns the opened OPCPackage Container.
- */
- public OPCPackage GetPackage()
+ ///
+ /// Should text from textboxes be included? Default is true
+ ///
+ public bool IncludeTextBoxes
{
- return container;
+ get { return includeTextBoxes; }
+ set { includeTextBoxes = value; }
+ }
+
+ ///
+ ///
+ /// whether cell comments should be included
+ ///
+ /// @since 3.16-beta3
+ public bool IncludeCellComments
+ {
+ get { return includeCellComments; }
+ set { this.includeCellComments = value; }
}
- /**
- * Returns the core document properties
- */
+ public bool AddTabEachEmptyCell { get; set; } = true;
- public NPOI.POIXMLProperties.CoreProperties GetCoreProperties()
+ ///
+ /// Concatenate text from <rPh> text elements in SharedStringsTable
+ /// Default is true;
+ ///
+ /// concatenatePhoneticRuns
+ public void SetConcatenatePhoneticRuns(bool concatenatePhoneticRuns)
{
- return properties.GetCoreProperties();
+ this.concatenatePhoneticRuns = concatenatePhoneticRuns;
}
- /**
- * Returns the extended document properties
- */
- public NPOI.POIXMLProperties.ExtendedProperties GetExtendedProperties()
+ /// CultureInfo
+ ///
+ public CultureInfo Locale
+ {
+ get { return locale; }
+ set { locale = value; }
+ }
+ ///
+ /// Returns the opened OPCPackage container.
+ ///
+ public OPCPackage GetPackage()
{
- return properties.GetExtendedProperties();
+ return container;
}
- /**
- * Returns the custom document properties
- */
- public NPOI.POIXMLProperties.CustomProperties GetCustomProperties()
+ ///
+ /// Returns the core document properties
+ ///
+ public override CoreProperties GetCoreProperties()
+ {
+ return properties.CoreProperties;
+ }
+ ///
+ /// Returns the extended document properties
+ ///
+ public override ExtendedProperties GetExtendedProperties()
+ {
+ return properties.ExtendedProperties;
+ }
+ ///
+ /// Returns the custom document properties
+ ///
+ public override CustomProperties GetCustomProperties()
{
- return properties.GetCustomProperties();
+ return properties.CustomProperties;
}
- /**
- * Processes the given sheet
- */
+
+
+ ///
+ /// Processes the given sheet
+ ///
public void ProcessSheet(
SheetContentsHandler sheetContentsExtractor,
StylesTable styles,
+ CommentsTable comments,
ReadOnlySharedStringsTable strings,
- InputStream sheetInputStream)
+ Stream sheetInputStream)
+
{
+
DataFormatter formatter;
- if (locale == null)
+ if(locale == null)
{
formatter = new DataFormatter();
}
@@ -127,94 +218,138 @@ public void ProcessSheet(
}
InputSource sheetSource = new InputSource(sheetInputStream);
- SAXParserFactory saxFactory = SAXParserFactory.newInstance();
try
{
- SAXParser saxParser = saxFactory.newSAXParser();
- XMLReader sheetParser = saxParser.GetXMLReader();
- ContentHandler handler = new XSSFSheetXMLHandler(
- styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
- sheetParser.SetContentHandler(handler);
+ SAXDriver sheetParser = new SAXDriver();
+ IContentHandler handler = new XSSFSheetXMLHandler(
+ styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
+ sheetParser.ContentHandler = (handler);
sheetParser.Parse(sheetSource);
}
- catch (ParserConfigurationException e)
+ catch(SAXException e)
{
- throw new RuntimeException("SAX Parser appears to be broken - " + e.GetMessage());
+ throw new RuntimeException("SAX parser appears to be broken - " + e.Message);
}
}
- /**
- * Processes the file and returns the text
- */
- public String GetText()
+ ///
+ /// Processes the file and returns the text
+ ///
+ public override String Text
{
- try
+ get
{
- ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container);
- XSSFReader xssfReader = new XSSFReader(container);
- StylesTable styles = xssfReader.GetStylesTable();
- XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator)xssfReader.GetSheetsData();
+ try
+ {
+ ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
+ XSSFReader xssfReader = new XSSFReader(container);
+ StylesTable styles = xssfReader.StylesTable;
+ XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.GetSheetsData();
- StringBuilder text = new StringBuilder();
- SheetTextExtractor sheetExtractor = new SheetTextExtractor(text);
+ StringBuilder text = new StringBuilder();
+ SheetTextExtractor sheetExtractor = new SheetTextExtractor(this);
- while (iter.HasNext())
- {
- InputStream stream = iter.next();
- if (includeSheetNames)
+ while(iter.MoveNext())
{
- text.Append(iter.GetSheetName());
- text.Append('\n');
+ Stream stream = iter.Current;
+ if(includeSheetNames)
+ {
+ text.Append(iter.SheetName);
+ text.Append('\n');
+ }
+ CommentsTable comments = includeCellComments ? iter.SheetComments : null;
+ ProcessSheet(sheetExtractor, styles, comments, strings, stream);
+ if(includeHeadersFooters)
+ {
+ sheetExtractor.AppendHeaderText(text);
+ }
+ sheetExtractor.AppendCellText(text);
+ if(includeTextBoxes)
+ {
+ ProcessShapes(iter.Shapes, text);
+ }
+ if(includeHeadersFooters)
+ {
+ sheetExtractor.AppendFooterText(text);
+ }
+ sheetExtractor.Reset();
+ stream.Close();
}
- ProcessSheet(sheetExtractor, styles, strings, stream);
- stream.Close();
- }
- return text.ToString();
+ return text.ToString();
+ }
+ catch(IOException)
+ {
+ //LOGGER.log(POILogger.WARN, e);
+ return null;
+ }
+ catch(SAXException)
+ {
+ //LOGGER.log(POILogger.WARN, se);
+ return null;
+ }
+ catch(OpenXml4NetException)
+ {
+ //LOGGER.log(POILogger.WARN, o4je);
+ return null;
+ }
}
- catch (IOException e)
+
+ }
+
+ static void ProcessShapes(List shapes, StringBuilder text)
+ {
+ if(shapes == null)
{
- System.err.println(e);
- return null;
+ return;
}
- catch (OpenXML4NetException o4je)
+ foreach(XSSFShape shape in shapes)
{
- System.err.println(o4je);
- return null;
+ if(shape is XSSFSimpleShape)
+ {
+ String sText = ((XSSFSimpleShape)shape).Text;
+ if(sText != null && sText.Length > 0)
+ {
+ text.Append(sText).Append('\n');
+ }
+ }
}
}
- public void Close()
+ public override void Close()
{
- if (container != null)
+
+ if(container != null)
{
container.Close();
container = null;
}
- base.close();
+ base.Close();
}
+
protected class SheetTextExtractor : SheetContentsHandler
{
- private StringBuilder output;
- private bool firstCellOfRow = true;
-
- protected SheetTextExtractor(StringBuilder output)
+ private StringBuilder output;
+ private bool firstCellOfRow;
+ private Dictionary headerFooterMap;
+ private XSSFEventBasedExcelExtractor eb;
+ public SheetTextExtractor(XSSFEventBasedExcelExtractor eb)
{
- this.output = output;
+ this.eb = eb;
+ this.output = new StringBuilder();
+ this.firstCellOfRow = true;
+ this.headerFooterMap = eb.IncludeHeadersFooters ? new Dictionary() : null;
}
-
- public void startRow(int rowNum)
+ public void StartRow(int rowNum)
{
firstCellOfRow = true;
}
-
- public void endRow()
+ public void EndRow(int rowNum)
{
output.Append('\n');
}
-
- public void cell(String cellRef, String formattedValue)
+ public void Cell(String cellRef, String formattedValue, XSSFComment comment)
{
- if (firstCellOfRow)
+ if(firstCellOfRow)
{
firstCellOfRow = false;
}
@@ -222,12 +357,118 @@ public void cell(String cellRef, String formattedValue)
{
output.Append('\t');
}
- output.Append(formattedValue);
+ if(formattedValue != null)
+ {
+ eb.CheckMaxTextSize(output, formattedValue);
+ output.Append(formattedValue);
+ }
+ if(eb.IncludeCellComments && comment != null)
+ {
+ String commentText = comment.String.String.Replace('\n', ' ');
+ output.Append(formattedValue != null ? " Comment by " : "Comment by ");
+ eb.CheckMaxTextSize(output, commentText);
+ if(commentText.StartsWith(comment.Author + ": "))
+ {
+ output.Append(commentText);
+ }
+ else
+ {
+ output.Append(comment.Author).Append(": ").Append(commentText);
+ }
+ }
+ }
+ public void HeaderFooter(String text, bool IsHeader, String tagName)
+ {
+ if(headerFooterMap != null)
+ {
+ headerFooterMap[tagName] = text;
+ }
}
- public void headerFooter(String text, bool IsHeader, String tagName)
+ ///
+ /// Append the text for the named header or footer if found.
+ ///
+ private void AppendHeaderFooterText(StringBuilder buffer, String name)
{
- // We don't include headers in the output yet, so ignore
+ String text = headerFooterMap.TryGetValue(name, out string value) ? value : null;
+ if(text != null && text.Length > 0)
+ {
+ // this is a naive way of handling the left, center, and right
+ // header and footer delimiters, but it seems to be as good as
+ // the method used by XSSFExcelExtractor
+ text = HandleHeaderFooterDelimiter(text, "&L");
+ text = HandleHeaderFooterDelimiter(text, "&C");
+ text = HandleHeaderFooterDelimiter(text, "&R");
+ buffer.Append(text).Append('\n');
+ }
+ }
+ ///
+ /// Remove the delimiter if its found at the beginning of the text,
+ /// or replace it with a tab if its in the middle.
+ ///
+ private static String HandleHeaderFooterDelimiter(String text, String delimiter)
+ {
+ int index = text.IndexOf(delimiter);
+ if(index == 0)
+ {
+ text = text.Substring(2);
+ }
+ else if(index > 0)
+ {
+ text = text.Substring(0, index) + "\t" + text.Substring(index + 2);
+ }
+ return text;
+ }
+
+
+ ///
+ /// Append the text for each header type in the same order
+ /// they are appended in XSSFExcelExtractor.
+ ///
+ ///
+ ///
+ public void AppendHeaderText(StringBuilder buffer)
+ {
+ AppendHeaderFooterText(buffer, "firstHeader");
+ AppendHeaderFooterText(buffer, "oddHeader");
+ AppendHeaderFooterText(buffer, "evenHeader");
+ }
+
+ ///
+ /// Append the text for each footer type in the same order
+ /// they are appended in XSSFExcelExtractor.
+ ///
+ ///
+ ///
+ public void AppendFooterText(StringBuilder buffer)
+ {
+ // append the text for each footer type in the same order
+ // they are appended in XSSFExcelExtractor
+ AppendHeaderFooterText(buffer, "firstFooter");
+ AppendHeaderFooterText(buffer, "oddFooter");
+ AppendHeaderFooterText(buffer, "evenFooter");
+ }
+
+ ///
+ /// Append the cell contents we have collected.
+ ///
+ public void AppendCellText(StringBuilder buffer)
+ {
+ eb.CheckMaxTextSize(buffer, output.ToString());
+ buffer.Append(output);
+ }
+
+ ///
+ /// Reset this SheetTextExtractor for the next sheet.
+ ///
+ public void Reset()
+ {
+ output.Length = 0;
+ firstCellOfRow = true;
+ if(headerFooterMap != null)
+ {
+ headerFooterMap.Clear();
+ }
}
}
}
diff --git a/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs b/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs
index a33762061..cd626ea8d 100644
--- a/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs
+++ b/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs
@@ -21,6 +21,7 @@ limitations under the License.
using NPOI.SS.UserModel;
using System.Globalization;
using System.Collections.Generic;
+using NPOI.Util;
namespace NPOI.XSSF.Extractor
{
@@ -30,10 +31,10 @@ namespace NPOI.XSSF.Extractor
public class XSSFExcelExtractor : POIXMLTextExtractor, NPOI.SS.Extractor.IExcelExtractor
{
public static XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
- XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
- XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
- XSSFRelation.MACROS_WORKBOOK
- };
+ XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
+ XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
+ XSSFRelation.MACROS_WORKBOOK
+ };
private readonly XSSFWorkbook workbook;
private readonly DataFormatter dataFormatter;
@@ -60,7 +61,7 @@ public XSSFExcelExtractor(XSSFWorkbook workbook)
///
/// Should header and footer be included? Default is true
///
- public bool IncludeHeaderFooter
+ public bool IncludeHeadersFooters
{
get
{
@@ -129,9 +130,12 @@ public bool IncludeTextBoxes
includeTextBoxes = value;
}
}
+ public bool AddTabEachEmptyCell { get; set; } = true;
/**
* Should sheet names be included? Default is true
*/
+ [Obsolete("use property IncludeSheetNames")]
+ [Removal(Version = "4.0")]
public void SetIncludeSheetNames(bool includeSheetNames)
{
this.includeSheetNames = includeSheetNames;
@@ -140,6 +144,8 @@ public void SetIncludeSheetNames(bool includeSheetNames)
* Should we return the formula itself, and not
* the result it produces? Default is false
*/
+ [Obsolete("use property FormulasNotResults")]
+ [Removal(Version = "4.0")]
public void SetFormulasNotResults(bool formulasNotResults)
{
this.formulasNotResults = formulasNotResults;
@@ -147,6 +153,8 @@ public void SetFormulasNotResults(bool formulasNotResults)
/**
* Should cell comments be included? Default is false
*/
+ [Obsolete("use property IncludeCellComments")]
+ [Removal(Version = "4.0")]
public void SetIncludeCellComments(bool includeCellComments)
{
this.includeCellComments = includeCellComments;
@@ -154,6 +162,8 @@ public void SetIncludeCellComments(bool includeCellComments)
/**
* Should headers and footers be included? Default is true
*/
+ [Obsolete("use property IncludeHeadersFooters")]
+ [Removal(Version = "4.0")]
public void SetIncludeHeadersFooters(bool includeHeadersFooters)
{
this.includeHeadersFooters = includeHeadersFooters;
@@ -163,6 +173,8 @@ public void SetIncludeHeadersFooters(bool includeHeadersFooters)
* Should text within textboxes be included? Default is true
* @param includeTextBoxes
*/
+ [Obsolete("use property IncludeTextBoxes")]
+ [Removal(Version = "4.0")]
public void SetIncludeTextBoxes(bool includeTextBoxes)
{
this.includeTextBoxes = includeTextBoxes;
@@ -223,13 +235,16 @@ public override string Text
for (int j = 0; j < row.LastCellNum; j++)
{
// Add a tab delimiter for each empty cell.
- if (!firsttime)
+ if(AddTabEachEmptyCell)
{
- text.Append("\t");
- }
- else
- {
- firsttime = false;
+ if(!firsttime)
+ {
+ text.Append("\t");
+ }
+ else
+ {
+ firsttime = false;
+ }
}
ICell cell = row.GetCell(j);
diff --git a/ooxml/XSSF/Model/CommentsTable.cs b/ooxml/XSSF/Model/CommentsTable.cs
index 7e545140a..763d359de 100644
--- a/ooxml/XSSF/Model/CommentsTable.cs
+++ b/ooxml/XSSF/Model/CommentsTable.cs
@@ -116,15 +116,9 @@ public void ReferenceUpdated(CellAddress oldReference, CT_Comment comment)
}
- public int GetNumberOfComments()
- {
- return comments.commentList.SizeOfCommentArray();
- }
+ public int NumberOfComments => comments.commentList.SizeOfCommentArray();
- public int GetNumberOfAuthors()
- {
- return comments.authors.SizeOfAuthorArray();
- }
+ public int NumberOfAuthors => comments.authors.SizeOfAuthorArray();
public String GetAuthor(long authorId)
{
diff --git a/ooxml/XSSF/Model/StylesTable.cs b/ooxml/XSSF/Model/StylesTable.cs
index 8c8238739..72f123ea8 100644
--- a/ooxml/XSSF/Model/StylesTable.cs
+++ b/ooxml/XSSF/Model/StylesTable.cs
@@ -130,25 +130,26 @@ public void SetWorkbook(XSSFWorkbook wb)
{
this.workbook = wb;
}
- public ThemesTable GetTheme()
+ public ThemesTable Theme
{
- return theme;
- }
-
- public void SetTheme(ThemesTable theme)
- {
- this.theme = theme;
-
- if (theme != null) theme.SetColorMap(indexedColors);
- // Pass the themes table along to things which need to
- // know about it, but have already been Created by now
- foreach (XSSFFont font in fonts)
+ get
{
- font.SetThemesTable(theme);
+ return theme;
}
- foreach (XSSFCellBorder border in borders)
+ set
{
- border.SetThemesTable(theme);
+ this.theme = value;
+
+ // Pass the themes table along to things which need to
+ // know about it, but have already been Created by now
+ foreach(XSSFFont font in fonts)
+ {
+ font.SetThemesTable(theme);
+ }
+ foreach(XSSFCellBorder border in borders)
+ {
+ border.SetThemesTable(theme);
+ }
}
}
@@ -199,7 +200,7 @@ public void EnsureThemesTable()
{
if (theme != null) return;
- SetTheme((ThemesTable)workbook.CreateRelationship(XSSFRelation.THEME, XSSFFactory.GetInstance()));
+ theme = (ThemesTable)workbook.CreateRelationship(XSSFRelation.THEME, XSSFFactory.GetInstance());
}
/**
* Read this shared styles table from an XML file.
@@ -551,10 +552,7 @@ public ReadOnlyCollection GetFills()
return fills.AsReadOnly();
}
- public ReadOnlyCollection GetFonts()
- {
- return fonts.AsReadOnly();
- }
+ public ReadOnlyCollection Fonts => fonts.AsReadOnly();
public IDictionary GetNumberFormats()
{
diff --git a/ooxml/XSSF/UserModel/XSSFFont.cs b/ooxml/XSSF/UserModel/XSSFFont.cs
index 720ce0c95..5e9eb65df 100644
--- a/ooxml/XSSF/UserModel/XSSFFont.cs
+++ b/ooxml/XSSF/UserModel/XSSFFont.cs
@@ -550,7 +550,7 @@ public override String ToString()
// */
public long RegisterTo(StylesTable styles)
{
- this._themes = styles.GetTheme();
+ this._themes = styles.Theme;
short idx = (short)styles.PutFont(this, true);
this._index = idx;
return idx;
diff --git a/ooxml/XSSF/UserModel/XSSFRichTextString.cs b/ooxml/XSSF/UserModel/XSSFRichTextString.cs
index dd5f323c6..371de8dfe 100644
--- a/ooxml/XSSF/UserModel/XSSFRichTextString.cs
+++ b/ooxml/XSSF/UserModel/XSSFRichTextString.cs
@@ -756,7 +756,7 @@ private ThemesTable GetThemesTable()
{
if(styles == null)
return null;
- return styles.GetTheme();
+ return styles.Theme;
}
}
}
diff --git a/ooxml/XSSF/UserModel/XSSFSheet.cs b/ooxml/XSSF/UserModel/XSSFSheet.cs
index f4fc5f20e..37150c014 100644
--- a/ooxml/XSSF/UserModel/XSSFSheet.cs
+++ b/ooxml/XSSF/UserModel/XSSFSheet.cs
@@ -950,7 +950,7 @@ public bool HasComments
return false;
}
- return sheetComments.GetNumberOfComments() > 0;
+ return sheetComments.NumberOfComments > 0;
}
}
@@ -963,7 +963,7 @@ internal int NumberOfComments
return 0;
}
- return sheetComments.GetNumberOfComments();
+ return sheetComments.NumberOfComments;
}
}
@@ -4138,7 +4138,7 @@ public void CopyTo(IWorkbook dest, string name, bool copyStyle, bool keepFormula
StylesTable styles = ((XSSFWorkbook) dest).GetStylesSource();
if(copyStyle && Workbook.NumberOfFonts > 0)
{
- foreach(XSSFFont font in ((XSSFWorkbook) Workbook).GetStylesSource().GetFonts())
+ foreach(XSSFFont font in ((XSSFWorkbook) Workbook).GetStylesSource().Fonts)
{
styles.PutFont(font);
}
diff --git a/ooxml/XSSF/UserModel/XSSFWorkbook.cs b/ooxml/XSSF/UserModel/XSSFWorkbook.cs
index 60b280a5c..e17f431a2 100644
--- a/ooxml/XSSF/UserModel/XSSFWorkbook.cs
+++ b/ooxml/XSSF/UserModel/XSSFWorkbook.cs
@@ -366,7 +366,7 @@ internal override void OnDocumentRead()
}
}
stylesSource.SetWorkbook(this);
- stylesSource.SetTheme(theme);
+ stylesSource.Theme = theme;
if (sharedStringSource == null)
{
@@ -1121,7 +1121,7 @@ public short NumberOfFonts
{
get
{
- return (short)stylesSource.GetFonts().Count;
+ return (short)stylesSource.Fonts.Count;
}
}
@@ -1867,7 +1867,7 @@ public StylesTable GetStylesSource()
public ThemesTable GetTheme()
{
if (stylesSource == null) return null;
- return stylesSource.GetTheme();
+ return stylesSource.Theme;
}
/**
diff --git a/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj b/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj
index 24573ba7c..bf948b49b 100644
--- a/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj
+++ b/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj
@@ -1,4 +1,4 @@
-
+
net472;net8.0
diff --git a/testcases/ooxml/XSSF/EventUserModel/TestReadOnlySharedStringsTable.cs b/testcases/ooxml/XSSF/EventUserModel/TestReadOnlySharedStringsTable.cs
new file mode 100644
index 000000000..a0fcc1382
--- /dev/null
+++ b/testcases/ooxml/XSSF/EventUserModel/TestReadOnlySharedStringsTable.cs
@@ -0,0 +1,125 @@
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace TestCases.XSSF.EventUserModel
+{
+
+ using System.Text.RegularExpressions;
+ using NPOI.OpenXml4Net.OPC;
+ using NPOI.XSSF.Model;
+ using NPOI.XSSF.UserModel;
+ using NPOI.XSSF.EventUserModel;
+ using NUnit.Framework;
+ using NPOI.OpenXmlFormats.Spreadsheet;
+ using NUnit.Framework.Legacy;
+
+ ///
+ /// Tests for
+ ///
+ [TestFixture]
+ public sealed class TestReadOnlySharedStringsTable
+ {
+ private static POIDataSamples _ssTests = POIDataSamples.GetSpreadSheetInstance();
+
+ [Test]
+ public void TestParse()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx"));
+ List parts = pkg.GetPartsByName(new Regex("/xl/sharedStrings.xml", RegexOptions.Compiled));
+ ClassicAssert.AreEqual(1, parts.Count);
+
+ SharedStringsTable stbl = new SharedStringsTable(parts[0]);
+ ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts[0]);
+
+ ClassicAssert.AreEqual(stbl.Count, rtbl.Count);
+ ClassicAssert.AreEqual(stbl.UniqueCount, rtbl.UniqueCount);
+
+ ClassicAssert.AreEqual(stbl.Items.Count, stbl.UniqueCount);
+ ClassicAssert.AreEqual(rtbl.Items.Count, rtbl.UniqueCount);
+ for(int i = 0; i < stbl.UniqueCount; i++)
+ {
+ CT_Rst i1 = stbl.GetEntryAt(i);
+ String i2 = rtbl.GetEntryAt(i);
+ ClassicAssert.AreEqual(i1.t, i2);
+ }
+
+ }
+
+ //51519
+ [Test]
+ public void TestPhoneticRuns()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("51519.xlsx"));
+ List parts = pkg.GetPartsByName(new Regex("/xl/sharedStrings.xml", RegexOptions.Compiled));
+ ClassicAssert.AreEqual(1, parts.Count);
+
+ ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts[0], true);
+ List strings = rtbl.Items;
+ ClassicAssert.AreEqual(49, strings.Count);
+
+ ClassicAssert.AreEqual("\u30B3\u30E1\u30F3\u30C8", rtbl.GetEntryAt(0));
+ ClassicAssert.AreEqual("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3", rtbl.GetEntryAt(3));
+
+ //now do not include phonetic runs
+ rtbl = new ReadOnlySharedStringsTable(parts[0], false);
+ strings = rtbl.Items;
+ ClassicAssert.AreEqual(49, strings.Count);
+
+ ClassicAssert.AreEqual("\u30B3\u30E1\u30F3\u30C8", rtbl.GetEntryAt(0));
+ ClassicAssert.AreEqual("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.GetEntryAt(3));
+
+ }
+ [Test]
+ public void TestEmptySSTOnPackageObtainedViaWorkbook()
+ {
+
+ XSSFWorkbook wb = new XSSFWorkbook(_ssTests.OpenResourceAsStream("noSharedStringTable.xlsx"));
+ OPCPackage pkg = wb.Package;
+ assertEmptySST(pkg);
+ wb.Close();
+ }
+ [Test]
+ public void TestEmptySSTOnPackageDirect()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("noSharedStringTable.xlsx"));
+ assertEmptySST(pkg);
+ }
+
+ private void assertEmptySST(OPCPackage pkg)
+ {
+
+ ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg);
+ ClassicAssert.AreEqual(0, sst.Count);
+ ClassicAssert.AreEqual(0, sst.UniqueCount);
+ ClassicAssert.IsNull(sst.Items); // same state it's left in if fed a package which has no SST part.
+ }
+
+ }
+}
+
diff --git a/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs b/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs
new file mode 100644
index 000000000..6149c4c94
--- /dev/null
+++ b/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs
@@ -0,0 +1,353 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace TestCases.XSSF.EventUserModel
+{
+ using NPOI;
+ using NPOI.OpenXml4Net.OPC;
+ using NPOI.Util;
+ using NPOI.XSSF;
+ using NPOI.XSSF.EventUserModel;
+ using NPOI.XSSF.Model;
+ using NPOI.XSSF.UserModel;
+ using NUnit.Framework;
+ using NUnit.Framework.Legacy;
+
+
+ ///
+ /// Tests for
+ ///
+ [TestFixture]
+ public sealed class TestXSSFReader
+ {
+ private static POIDataSamples _ssTests = POIDataSamples.GetSpreadSheetInstance();
+
+ [Test]
+ public void TestGetBits()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx"));
+
+ XSSFReader r = new XSSFReader(pkg);
+
+ ClassicAssert.IsNotNull(r.WorkbookData);
+ ClassicAssert.IsNotNull(r.SharedStringsData);
+ ClassicAssert.IsNotNull(r.StylesData);
+
+ ClassicAssert.IsNotNull(r.SharedStringsTable);
+ ClassicAssert.IsNotNull(r.StylesTable);
+ }
+
+ [Test]
+ public void TestStyles()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx"));
+
+ XSSFReader r = new XSSFReader(pkg);
+
+ ClassicAssert.AreEqual(3, r.StylesTable.Fonts.Count);
+ ClassicAssert.AreEqual(0, r.StylesTable.NumDataFormats);
+
+ // The Styles Table should have the themes associated with it too
+ ClassicAssert.IsNotNull(r.StylesTable.Theme);
+
+ // Check we Get valid data for the two
+ ClassicAssert.IsNotNull(r.StylesData);
+ ClassicAssert.IsNotNull(r.ThemesData);
+ }
+
+ [Test]
+ public void TestStrings()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx"));
+
+ XSSFReader r = new XSSFReader(pkg);
+
+ ClassicAssert.AreEqual(11, r.SharedStringsTable.Items.Count);
+ ClassicAssert.AreEqual("Test spreadsheet", new XSSFRichTextString(r.SharedStringsTable.GetEntryAt(0)).ToString());
+ }
+
+ [Test]
+ public void TestSheets()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx"));
+
+ XSSFReader r = new XSSFReader(pkg);
+ byte[] data = new byte[4096];
+
+ // By r:id
+ ClassicAssert.IsNotNull(r.GetSheet("rId2"));
+ int read = IOUtils.ReadFully(r.GetSheet("rId2"), data);
+ ClassicAssert.AreEqual(974, read);
+
+ // All
+ IEnumerator it = r.GetSheetsData();
+
+ int count = 0;
+ while(it.MoveNext())
+ {
+ count++;
+ Stream inp = it.Current;
+ ClassicAssert.IsNotNull(inp);
+ read = IOUtils.ReadFully(inp, data);
+ inp.Close();
+
+ ClassicAssert.IsTrue(read > 400);
+ ClassicAssert.IsTrue(read < 1500);
+ }
+ ClassicAssert.AreEqual(3, count);
+ }
+
+ ///
+ /// Check that the sheet iterator returns sheets in the logical order
+ /// (as they are defined in the workbook.xml)
+ ///
+ [Test]
+ public void TestOrderOfSheets()
+ {
+
+ OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("reordered_sheets.xlsx"));
+
+ XSSFReader r = new XSSFReader(pkg);
+
+ String[] sheetNames = {"Sheet4", "Sheet2", "Sheet3", "Sheet1"};
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.GetSheetsData();
+
+ int count = 0;
+ while(it.MoveNext())
+ {
+ Stream inp = it.Current;
+ ClassicAssert.IsNotNull(inp);
+ inp.Close();
+
+ ClassicAssert.AreEqual(sheetNames[count], it.SheetName);
+ count++;
+ }
+ ClassicAssert.AreEqual(4, count);
+ }
+ [Test]
+ public void TestComments()
+ {
+
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("comments.xlsx");
+ XSSFReader r = new XSSFReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.GetSheetsData();
+
+ int count = 0;
+ while(it.MoveNext())
+ {
+ count++;
+ Stream inp = it.Current;
+ inp.Close();
+
+ if(count == 1)
+ {
+ ClassicAssert.IsNotNull(it.SheetComments);
+ CommentsTable ct = it.SheetComments;
+ ClassicAssert.AreEqual(1, ct.NumberOfAuthors);
+ ClassicAssert.AreEqual(3, ct.NumberOfComments);
+ }
+ else
+ {
+ ClassicAssert.IsNull(it.SheetComments);
+ }
+ }
+ ClassicAssert.AreEqual(3, count);
+ }
+
+ ///
+ /// Iterating over a workbook with chart sheets in it, using the
+ /// XSSFReader method
+ ///
+ /// Exception
+ [Test]
+ public void Test50119()
+ {
+
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("WithChartSheet.xlsx");
+ XSSFReader r = new XSSFReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.GetSheetsData();
+
+ while(it.MoveNext())
+ {
+ Stream stream = it.Current;
+ stream.Close();
+ }
+ }
+
+ ///
+ /// Test text extraction from text box using GetShapes()
+ ///
+ /// Exception
+ [Test]
+ public void TestShapes()
+ {
+
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("WithTextBox.xlsx");
+ XSSFReader r = new XSSFReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.GetSheetsData();
+
+ String text = GetShapesString(it);
+ StringAssert.Contains("Line 1", text);
+ StringAssert.Contains("Line 2", text);
+ StringAssert.Contains("Line 3", text);
+ }
+
+ private String GetShapesString(XSSFReader.SheetIterator it)
+ {
+ StringBuilder sb = new StringBuilder();
+ while(it.MoveNext())
+ {
+ var _ = it.Current;
+ List shapes = it.Shapes;
+ if(shapes != null)
+ {
+ foreach(XSSFShape shape in shapes)
+ {
+ if(shape is XSSFSimpleShape)
+ {
+ String t = ((XSSFSimpleShape) shape).Text;
+ sb.Append(t).Append('\n');
+ }
+ }
+ }
+ }
+ return sb.ToString();
+ }
+ [Test]
+ public void TestBug57914()
+ {
+
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("57914.xlsx");
+ XSSFReader r;
+
+ // for now expect this to Assert.Fail, when we fix 57699, this one should Assert.Fail so we know we should adjust
+ // this test as well
+ try
+ {
+ r = new XSSFReader(pkg);
+ Assert.Fail("This will Assert.Fail until bug 57699 is fixed");
+ }
+ catch(POIXMLException e)
+ {
+ StringAssert.Contains("57699", e.Message);
+ return;
+ }
+
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.GetSheetsData();
+
+ String text = GetShapesString(it);
+ StringAssert.Contains("Line 1", text);
+ StringAssert.Contains("Line 2", text);
+ StringAssert.Contains("Line 3", text);
+ }
+
+ ///
+ /// NPE from XSSFReader$SheetIterator. on XLSX files generated by
+ /// the openpyxl library
+ ///
+ [Test]
+ public void Test58747()
+ {
+
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("58747.xlsx");
+ ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
+ ClassicAssert.IsNotNull(strings);
+ XSSFReader reader = new XSSFReader(pkg);
+ StylesTable styles = reader.StylesTable;
+ ClassicAssert.IsNotNull(styles);
+
+ XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.GetSheetsData();
+ ClassicAssert.AreEqual(true, iter.MoveNext());
+ var _ = iter.Current;
+
+ ClassicAssert.AreEqual(false, iter.MoveNext());
+ ClassicAssert.AreEqual("Orders", iter.SheetName);
+
+ pkg.Close();
+ }
+
+ ///
+ /// NPE when sheet has no relationship id in the workbook
+ /// 60825
+ ///
+ [Test]
+ public void TestSheetWithNoRelationshipId()
+ {
+
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("60825.xlsx");
+ ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
+ ClassicAssert.IsNotNull(strings);
+ XSSFReader reader = new XSSFReader(pkg);
+ StylesTable styles = reader.StylesTable;
+ ClassicAssert.IsNotNull(styles);
+
+ XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.GetSheetsData();
+ iter.MoveNext();
+ ClassicAssert.IsNotNull(iter.Current);
+ ClassicAssert.IsFalse(iter.MoveNext());
+
+ pkg.Close();
+ }
+
+ ///
+ ///
+ /// bug 61304: Call to XSSFReader.SheetsData returns duplicate sheets.
+ ///
+ ///
+ /// The problem seems to be caused only by those xlsx files which have a specific
+ /// order of the attributes inside the <sheet> tag of workbook.xml
+ ///
+ ///
+ /// Example (which causes the problems):
+ /// <sheet name="Sheet6" r:id="rId6" sheetId="4"/>
+ ///
+ ///
+ /// While this one works correctly:
+ /// <sheet name="Sheet6" sheetId="4" r:id="rId6"/>
+ ///
+ ///
+ [Test]
+ public void Test61034()
+ {
+ OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("61034.xlsx");
+ XSSFReader reader = new XSSFReader(pkg);
+ XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.GetSheetsData();
+ ISet seen = new HashSet();
+ while(iter.MoveNext())
+ {
+ Stream stream = iter.Current;
+ String sheetName = iter.SheetName;
+ CollectionAssert.DoesNotContain(seen, sheetName);
+ seen.Add(sheetName);
+ stream.Close();
+ }
+ pkg.Close();
+ }
+ }
+}
+
diff --git a/testcases/ooxml/XSSF/Extractor/TestXSSFEventBasedExcelExtractor.cs b/testcases/ooxml/XSSF/Extractor/TestXSSFEventBasedExcelExtractor.cs
new file mode 100644
index 000000000..76f2e8082
--- /dev/null
+++ b/testcases/ooxml/XSSF/Extractor/TestXSSFEventBasedExcelExtractor.cs
@@ -0,0 +1,435 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace TestCases.XSSF.Extractor
+{
+
+ using NPOI;
+ using NPOI.HSSF;
+ using NPOI.HSSF.Extractor;
+ using NPOI.XSSF;
+ using NPOI.XSSF.Extractor;
+ using NUnit.Framework;
+ using NUnit.Framework.Legacy;
+ using System.Text.RegularExpressions;
+ using TestCases.HSSF;
+
+ ///
+ /// Tests for
+ ///
+ [TestFixture]
+ public class TestXSSFEventBasedExcelExtractor
+ {
+ protected XSSFEventBasedExcelExtractor GetExtractor(String sampleName)
+ {
+
+ return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.OpenSamplePackage(sampleName));
+ }
+
+ ///
+ /// Get text out of the simple file
+ ///
+ [Test]
+ public void TestGetSimpleText()
+ {
+
+ // a very simple file
+ XSSFEventBasedExcelExtractor extractor = GetExtractor("sample.xlsx");
+ var _ = extractor.Text;
+
+ String text = extractor.Text;
+ ClassicAssert.IsTrue(text.Length > 0);
+
+ // Check sheet names
+ POITestCase.AssertStartsWith(text, "Sheet1");
+ POITestCase.AssertEndsWith(text, "Sheet3\n");
+
+ // Now without, will have text
+ extractor.IncludeSheetNames = (false);
+ text = extractor.Text;
+ String CHUNK1 =
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n";
+ String CHUNK2 =
+ "The quick brown fox jumps over the lazy dog\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n";
+ ClassicAssert.AreEqual(
+ CHUNK1 +
+ "at\t4995\n" +
+ CHUNK2
+ , text);
+
+ // Now Get formulas not their values
+ extractor.FormulasNotResults = (true);
+ text = extractor.Text;
+ ClassicAssert.AreEqual(
+ CHUNK1 +
+ "at\tSUM(B1:B9)\n" +
+ CHUNK2, text);
+
+ // With sheet names too
+ extractor.IncludeSheetNames = (true);
+ text = extractor.Text;
+ ClassicAssert.AreEqual(
+ "Sheet1\n" +
+ CHUNK1 +
+ "at\tSUM(B1:B9)\n" +
+ "rich test\n" +
+ CHUNK2 +
+ "Sheet3\n"
+ , text);
+
+ extractor.Close();
+ }
+
+ [Test]
+ public void TestGetComplexText()
+ {
+
+ // A fairly complex file
+ XSSFEventBasedExcelExtractor extractor = GetExtractor("AverageTaxRates.xlsx");
+ var _ = extractor.Text;
+
+ String text = extractor.Text;
+ ClassicAssert.IsTrue(text.Length > 0);
+
+ // Might not have all formatting it should do!
+ POITestCase.AssertStartsWith(text,
+ "Avgtxfull\n" +
+ "(iii) AVERAGE TAX RATES ON ANNUAL"
+ );
+
+ extractor.Close();
+ }
+
+ [Test]
+ public void TestInlineStrings()
+ {
+
+ XSSFEventBasedExcelExtractor extractor = GetExtractor("InlineStrings.xlsx");
+ extractor.FormulasNotResults = (true);
+ String text = extractor.Text;
+
+ // Numbers
+ POITestCase.AssertContains(text, "43");
+ POITestCase.AssertContains(text, "22");
+
+ // Strings
+ POITestCase.AssertContains(text, "ABCDE");
+ POITestCase.AssertContains(text, "Long Text");
+
+ // Inline Strings
+ POITestCase.AssertContains(text, "1st Inline String");
+ POITestCase.AssertContains(text, "And More");
+
+ // Formulas
+ POITestCase.AssertContains(text, "A2");
+ POITestCase.AssertContains(text, "A5-A$2");
+
+ extractor.Close();
+ }
+
+ ///
+ /// Test that we return pretty much the same as
+ /// ExcelExtractor does, when we're both passed
+ /// the same file, just saved as xls and xlsx
+ ///
+ [Test]
+ public void TestComparedToOLE2()
+ {
+
+ // A fairly simple file - ooxml
+ XSSFEventBasedExcelExtractor ooxmlExtractor = GetExtractor("SampleSS.xlsx");
+
+ ExcelExtractor ole2Extractor =
+ new ExcelExtractor(HSSFTestDataSamples.OpenSampleWorkbook("SampleSS.xls"));
+
+ POITextExtractor[] extractors =
+ new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
+ foreach(POITextExtractor extractor in extractors)
+ {
+ String text = extractor.Text.Replace("\r", "").Replace("\t", "");
+ POITestCase.AssertStartsWith(text, "First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n");
+ Regex pattern = new Regex(".*13(\\.0+)?\\s+Sheet3.*", RegexOptions.Compiled | RegexOptions.Singleline);
+ Match m = pattern.Match(text);
+ ClassicAssert.IsTrue(m.Success);
+ }
+
+ ole2Extractor.Close();
+ ooxmlExtractor.Close();
+ }
+
+ ///
+ /// Test text extraction from text box using GetShapes()
+ ///
+ /// Exception
+ [Test]
+ public void TestShapes()
+ {
+ XSSFEventBasedExcelExtractor ooxmlExtractor = GetExtractor("WithTextBox.xlsx");
+ try
+ {
+ String text = ooxmlExtractor.Text;
+ StringAssert.Contains("Line 1", text);
+ StringAssert.Contains("Line 2", text);
+ StringAssert.Contains("Line 3", text);
+ }
+ finally
+ {
+ ooxmlExtractor.Close();
+ }
+ }
+
+ ///
+ /// Test that we return the same output for unstyled numbers as the
+ /// non-event-based XSSFExcelExtractor.
+ ///
+ [Test]
+ public void TestUnstyledNumbersComparedToNonEventBasedExtractor()
+ {
+ String expectedOutput = "Sheet1\n99.99\n";
+ XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.OpenSampleWorkbook("56011.xlsx"));
+ try
+ {
+ ClassicAssert.AreEqual(expectedOutput, extractor.Text.Replace(",", "."));
+ }
+ finally
+ {
+ extractor.Close();
+ }
+
+ XSSFEventBasedExcelExtractor fixture =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.OpenSamplePackage("56011.xlsx"));
+ try
+ {
+ ClassicAssert.AreEqual(expectedOutput, fixture.Text.Replace(",", "."));
+ }
+ finally
+ {
+ fixture.Close();
+ }
+ }
+
+ ///
+ /// Test that we return the same output headers and footers as the
+ /// non-event-based XSSFExcelExtractor.
+ ///
+ [Test]
+ public void TestHeadersAndFootersComparedToNonEventBasedExtractor()
+ {
+ String expectedOutputWithHeadersAndFooters =
+ "Sheet1\n" +
+ "&\"Calibri,Regular\"&K000000top left\t&\"Calibri,Regular\"&K000000top center\t&\"Calibri,Regular\"&K000000top right\n" +
+ "abc\t123\n" +
+ "&\"Calibri,Regular\"&K000000bottom left\t&\"Calibri,Regular\"&K000000bottom center\t&\"Calibri,Regular\"&K000000bottom right\n";
+
+ String expectedOutputWithoutHeadersAndFooters =
+ "Sheet1\n" +
+ "abc\t123\n";
+
+ XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.OpenSampleWorkbook("headerFooterTest.xlsx"));
+ try
+ {
+ ClassicAssert.AreEqual(expectedOutputWithHeadersAndFooters, extractor.Text);
+ extractor.IncludeHeadersFooters = (false);
+ ClassicAssert.AreEqual(expectedOutputWithoutHeadersAndFooters, extractor.Text);
+ }
+ finally
+ {
+ extractor.Close();
+ }
+
+ XSSFEventBasedExcelExtractor fixture =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.OpenSamplePackage("headerFooterTest.xlsx"));
+ try
+ {
+ ClassicAssert.AreEqual(expectedOutputWithHeadersAndFooters, fixture.Text);
+ fixture.IncludeHeadersFooters = (false);
+ ClassicAssert.AreEqual(expectedOutputWithoutHeadersAndFooters, fixture.Text);
+ }
+ finally
+ {
+ fixture.Close();
+ }
+ }
+
+ ///
+ ///
+ /// Test that XSSFEventBasedExcelExtractor outputs comments when specified.
+ /// The output will contain two improvements over the output from
+ /// XSSFExcelExtractor in that (1) comments from empty cells will be
+ /// outputted, and (2) the author will not be outputted twice.
+ ///
+ ///
+ /// This test will need to be modified if these improvements are ported to
+ /// XSSFExcelExtractor.
+ ///
+ ///
+ [Test]
+ public void TestCommentsComparedToNonEventBasedExtractor()
+ {
+ String expectedOutputWithoutComments =
+ "Sheet1\n" +
+ "\n" +
+ "abc\n" +
+ "\n" +
+ "123\n" +
+ "\n" +
+ "\n" +
+ "\n";
+
+ String nonEventBasedExtractorOutputWithComments =
+ "Sheet1\n" +
+ "\n" +
+ "abc Comment by Shaun Kalley: Shaun Kalley: Comment A2\n" +
+ "\n" +
+ "123 Comment by Shaun Kalley: Shaun Kalley: Comment B4\n" +
+ "\n" +
+ "\n" +
+ "\n";
+
+ String eventBasedExtractorOutputWithComments =
+ "Sheet1\n" +
+ "Comment by Shaun Kalley: Comment A1\tComment by Shaun Kalley: Comment B1\n" +
+ "abc Comment by Shaun Kalley: Comment A2\tComment by Shaun Kalley: Comment B2\n" +
+ "Comment by Shaun Kalley: Comment A3\tComment by Shaun Kalley: Comment B3\n" +
+ "Comment by Shaun Kalley: Comment A4\t123 Comment by Shaun Kalley: Comment B4\n" +
+ "Comment by Shaun Kalley: Comment A5\tComment by Shaun Kalley: Comment B5\n" +
+ "Comment by Shaun Kalley: Comment A7\tComment by Shaun Kalley: Comment B7\n" +
+ "Comment by Shaun Kalley: Comment A8\tComment by Shaun Kalley: Comment B8\n";
+
+ XSSFExcelExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.OpenSampleWorkbook("commentTest.xlsx"));
+ try
+ {
+ extractor.AddTabEachEmptyCell = false;
+ ClassicAssert.AreEqual(expectedOutputWithoutComments, extractor.Text);
+ extractor.IncludeCellComments = (true);
+ ClassicAssert.AreEqual(nonEventBasedExtractorOutputWithComments, extractor.Text);
+ }
+ finally
+ {
+ extractor.Close();
+ }
+
+ XSSFEventBasedExcelExtractor fixture =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.OpenSamplePackage("commentTest.xlsx"));
+ try
+ {
+ ClassicAssert.AreEqual(expectedOutputWithoutComments, fixture.Text);
+ fixture.IncludeCellComments = (true);
+ ClassicAssert.AreEqual(eventBasedExtractorOutputWithComments, fixture.Text);
+ }
+ finally
+ {
+ fixture.Close();
+ }
+ }
+
+ [Test]
+ public void TestFile56278_normal()
+ {
+
+ // first with normal Text Extractor
+ POIXMLTextExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.OpenSampleWorkbook("56278.xlsx"));
+ try
+ {
+ ClassicAssert.IsNotNull(extractor.Text);
+ }
+ finally
+ {
+ extractor.Close();
+ }
+ }
+
+ [Test]
+ public void TestFile56278_event()
+ {
+
+ // then with event based one
+ POIXMLTextExtractor extractor = GetExtractor("56278.xlsx");
+ try
+ {
+ ClassicAssert.IsNotNull(extractor.Text);
+ }
+ finally
+ {
+ extractor.Close();
+ }
+ }
+
+ [Test]
+ public void Test59021()
+ {
+
+ XSSFEventBasedExcelExtractor ex =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.OpenSamplePackage("59021.xlsx"));
+ String text = ex.Text;
+ StringAssert.Contains("Abkhazia - Fixed", text);
+ StringAssert.Contains("10/02/2016", text);
+ ex.Close();
+ }
+
+ [Test]
+ public void Test51519()
+ {
+
+ //default behavior: include phonetic runs
+ XSSFEventBasedExcelExtractor ex =
+ new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.OpenSamplePackage("51519.xlsx"));
+ String text = ex.Text;
+ StringAssert.Contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3", text);
+ ex.Close();
+
+ //now try turning them off
+ ex = new XSSFEventBasedExcelExtractor(
+ XSSFTestDataSamples.OpenSamplePackage("51519.xlsx"));
+ ex.SetConcatenatePhoneticRuns(false);
+ text = ex.Text;
+ ClassicAssert.IsFalse(text.Contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3"),
+ "should not be able to find appended phonetic run");
+ ex.Close();
+
+ }
+ }
+}
+
diff --git a/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs b/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs
index 0c60d000d..23c580f5a 100644
--- a/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs
+++ b/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs
@@ -55,7 +55,7 @@ public void TestGetSimpleText()
ClassicAssert.IsTrue(text.EndsWith("Sheet3\n"));
// Now without, will have text
- extractor.SetIncludeSheetNames(false);
+ extractor.IncludeSheetNames = false;
text = extractor.Text;
string CHUNK1 =
"Lorem\t111\n" +
@@ -80,7 +80,7 @@ public void TestGetSimpleText()
, text);
// Now Get formulas not their values
- extractor.SetFormulasNotResults(true);
+ extractor.FormulasNotResults = true;
text = extractor.Text;
ClassicAssert.AreEqual(
CHUNK1 +
@@ -88,7 +88,7 @@ public void TestGetSimpleText()
CHUNK2, text);
// With sheet names too
- extractor.SetIncludeSheetNames(true);
+ extractor.IncludeSheetNames = true;
text = extractor.Text;
ClassicAssert.AreEqual(
"Sheet1\n" +
@@ -184,7 +184,7 @@ public void TestComments()
ClassicAssert.IsFalse(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);
// Turn on comment extraction, will then be
- extractor.SetIncludeCellComments(true);
+ extractor.IncludeCellComments = true;
text = extractor.Text;
ClassicAssert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text\n" + text);
ClassicAssert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);
@@ -256,7 +256,7 @@ public void TestTextBoxes()
XSSFExcelExtractor extractor = GetExtractor("WithTextBox.xlsx");
try
{
- extractor.SetFormulasNotResults(true);
+ extractor.FormulasNotResults = true;
string text = extractor.Text;
ClassicAssert.IsTrue(text.IndexOf("Line 1") > -1);
ClassicAssert.IsTrue(text.IndexOf("Line 2") > -1);
diff --git a/testcases/ooxml/XSSF/Model/TestCommentsTable.cs b/testcases/ooxml/XSSF/Model/TestCommentsTable.cs
index bde3999bb..c8fcfd9d8 100644
--- a/testcases/ooxml/XSSF/Model/TestCommentsTable.cs
+++ b/testcases/ooxml/XSSF/Model/TestCommentsTable.cs
@@ -38,7 +38,7 @@ public class TestCommentsTable
public void FindAuthor()
{
CommentsTable sheetComments = new CommentsTable();
- ClassicAssert.AreEqual(1, sheetComments.GetNumberOfAuthors());
+ ClassicAssert.AreEqual(1, sheetComments.NumberOfAuthors);
ClassicAssert.AreEqual(0, sheetComments.FindAuthor(""));
ClassicAssert.AreEqual("", sheetComments.GetAuthor(0));
@@ -205,22 +205,22 @@ public void RemoveComment()
ClassicAssert.AreSame(a1, sheetComments.GetCTComment(addrA1));
ClassicAssert.AreSame(a2, sheetComments.GetCTComment(addrA2));
ClassicAssert.AreSame(a3, sheetComments.GetCTComment(addrA3));
- ClassicAssert.AreEqual(3, sheetComments.GetNumberOfComments());
+ ClassicAssert.AreEqual(3, sheetComments.NumberOfComments);
ClassicAssert.IsTrue(sheetComments.RemoveComment(addrA1));
- ClassicAssert.AreEqual(2, sheetComments.GetNumberOfComments());
+ ClassicAssert.AreEqual(2, sheetComments.NumberOfComments);
ClassicAssert.IsNull(sheetComments.GetCTComment(addrA1));
ClassicAssert.AreSame(a2, sheetComments.GetCTComment(addrA2));
ClassicAssert.AreSame(a3, sheetComments.GetCTComment(addrA3));
ClassicAssert.IsTrue(sheetComments.RemoveComment(addrA2));
- ClassicAssert.AreEqual(1, sheetComments.GetNumberOfComments());
+ ClassicAssert.AreEqual(1, sheetComments.NumberOfComments);
ClassicAssert.IsNull(sheetComments.GetCTComment(addrA1));
ClassicAssert.IsNull(sheetComments.GetCTComment(addrA2));
ClassicAssert.AreSame(a3, sheetComments.GetCTComment(addrA3));
ClassicAssert.IsTrue(sheetComments.RemoveComment(addrA3));
- ClassicAssert.AreEqual(0, sheetComments.GetNumberOfComments());
+ ClassicAssert.AreEqual(0, sheetComments.NumberOfComments);
ClassicAssert.IsNull(sheetComments.GetCTComment(addrA1));
ClassicAssert.IsNull(sheetComments.GetCTComment(addrA2));
ClassicAssert.IsNull(sheetComments.GetCTComment(addrA3));
diff --git a/testcases/ooxml/XSSF/Model/TestStylesTable.cs b/testcases/ooxml/XSSF/Model/TestStylesTable.cs
index 6e36cfa9b..87f5bbf1a 100644
--- a/testcases/ooxml/XSSF/Model/TestStylesTable.cs
+++ b/testcases/ooxml/XSSF/Model/TestStylesTable.cs
@@ -100,7 +100,7 @@ public void doTestExisting(StylesTable st)
ClassicAssert.AreEqual(1, st.StyleXfsSize);
ClassicAssert.AreEqual(8, st.NumDataFormats);
- ClassicAssert.AreEqual(2, st.GetFonts().Count);
+ ClassicAssert.AreEqual(2, st.Fonts.Count);
ClassicAssert.AreEqual(2, st.GetFills().Count);
ClassicAssert.AreEqual(1, st.GetBorders().Count);
diff --git a/testcases/ooxml/XSSF/Model/TestThemesTable.cs b/testcases/ooxml/XSSF/Model/TestThemesTable.cs
index d18bd9f30..a089d1aa6 100644
--- a/testcases/ooxml/XSSF/Model/TestThemesTable.cs
+++ b/testcases/ooxml/XSSF/Model/TestThemesTable.cs
@@ -249,16 +249,16 @@ public void TestAddNew()
ClassicAssert.AreEqual(null, wb.GetTheme());
StylesTable styles = wb.GetStylesSource();
- ClassicAssert.AreEqual(null, styles.GetTheme());
+ ClassicAssert.AreEqual(null, styles.Theme);
styles.EnsureThemesTable();
- ClassicAssert.IsNotNull(styles.GetTheme());
+ ClassicAssert.IsNotNull(styles.Theme);
ClassicAssert.IsNotNull(wb.GetTheme());
wb = XSSFTestDataSamples.WriteOutAndReadBack(wb) as XSSFWorkbook;
styles = wb.GetStylesSource();
- ClassicAssert.IsNotNull(styles.GetTheme());
+ ClassicAssert.IsNotNull(styles.Theme);
ClassicAssert.IsNotNull(wb.GetTheme());
}
}
diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs
index 4bef56af4..77f454b42 100644
--- a/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs
+++ b/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs
@@ -1415,7 +1415,7 @@ public void Test51850()
// Sheet 2 has comments
ClassicAssert.IsNotNull(sh2.GetCommentsTable(false));
- ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).GetNumberOfComments());
+ ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).NumberOfComments);
// Sheet 1 doesn't (yet)
ClassicAssert.IsNull(sh1.GetCommentsTable(false));
@@ -1464,10 +1464,10 @@ public void Test51850()
// Check the comments
ClassicAssert.IsNotNull(sh2.GetCommentsTable(false));
- ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).GetNumberOfComments());
+ ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).NumberOfComments);
ClassicAssert.IsNotNull(sh1.GetCommentsTable(false));
- ClassicAssert.AreEqual(2, sh1.GetCommentsTable(false).GetNumberOfComments());
+ ClassicAssert.AreEqual(2, sh1.GetCommentsTable(false).NumberOfComments);
wb2.Close();
}
diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs
index 548648621..3c217cb68 100644
--- a/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs
+++ b/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs
@@ -53,7 +53,7 @@ public void Constructor()
ClassicAssert.IsNotNull(sheetComments.GetCTComments().commentList);
ClassicAssert.IsNotNull(sheetComments.GetCTComments().authors);
ClassicAssert.AreEqual(1, sheetComments.GetCTComments().authors.SizeOfAuthorArray());
- ClassicAssert.AreEqual(1, sheetComments.GetNumberOfAuthors());
+ ClassicAssert.AreEqual(1, sheetComments.NumberOfAuthors);
CT_Comment ctComment = sheetComments.NewComment(CellAddress.A1);
CT_Shape vmlShape = new CT_Shape();
@@ -167,17 +167,17 @@ public void Author()
CommentsTable sheetComments = new CommentsTable();
CT_Comment ctComment = sheetComments.NewComment(CellAddress.A1);
- ClassicAssert.AreEqual(1, sheetComments.GetNumberOfAuthors());
+ ClassicAssert.AreEqual(1, sheetComments.NumberOfAuthors);
XSSFComment comment = new XSSFComment(sheetComments, ctComment, null);
ClassicAssert.AreEqual("", comment.Author);
comment.Author = ("Apache POI");
ClassicAssert.AreEqual("Apache POI", comment.Author);
- ClassicAssert.AreEqual(2, sheetComments.GetNumberOfAuthors());
+ ClassicAssert.AreEqual(2, sheetComments.NumberOfAuthors);
comment.Author = ("Apache POI");
- ClassicAssert.AreEqual(2, sheetComments.GetNumberOfAuthors());
+ ClassicAssert.AreEqual(2, sheetComments.NumberOfAuthors);
comment.Author = ("");
ClassicAssert.AreEqual("", comment.Author);
- ClassicAssert.AreEqual(2, sheetComments.GetNumberOfAuthors());
+ ClassicAssert.AreEqual(2, sheetComments.NumberOfAuthors);
}
[Test]
diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs
index 054f0ab77..cac07ef33 100644
--- a/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs
+++ b/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs
@@ -279,7 +279,7 @@ public void Styles()
// Has 8 number formats
ClassicAssert.AreEqual(8, st.NumDataFormats);
// Has 2 fonts
- ClassicAssert.AreEqual(2, st.GetFonts().Count);
+ ClassicAssert.AreEqual(2, st.Fonts.Count);
// Has 2 Fills
ClassicAssert.AreEqual(2, st.GetFills().Count);
// Has 1 border
@@ -303,7 +303,7 @@ public void Styles()
ClassicAssert.IsNotNull(ss);
ClassicAssert.AreEqual(10, st.NumDataFormats);
- ClassicAssert.AreEqual(2, st.GetFonts().Count);
+ ClassicAssert.AreEqual(2, st.Fonts.Count);
ClassicAssert.AreEqual(2, st.GetFills().Count);
ClassicAssert.AreEqual(1, st.GetBorders().Count);
diff --git a/testcases/test-data/spreadsheet/60825.xlsx b/testcases/test-data/spreadsheet/60825.xlsx
new file mode 100644
index 000000000..ffcfe08e6
Binary files /dev/null and b/testcases/test-data/spreadsheet/60825.xlsx differ
diff --git a/testcases/test-data/spreadsheet/61034.xlsx b/testcases/test-data/spreadsheet/61034.xlsx
new file mode 100644
index 000000000..cd2c5e564
Binary files /dev/null and b/testcases/test-data/spreadsheet/61034.xlsx differ