diff --git a/Directory.Packages.props b/Directory.Packages.props index 9e20d6f26..c6bc5d70b 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -14,6 +14,7 @@ + @@ -26,4 +27,4 @@ - + \ No newline at end of file diff --git a/main/HSSF/Extractor/ExcelExtractor.cs b/main/HSSF/Extractor/ExcelExtractor.cs index 9e9b92097..2f3cf5d2f 100644 --- a/main/HSSF/Extractor/ExcelExtractor.cs +++ b/main/HSSF/Extractor/ExcelExtractor.cs @@ -42,7 +42,7 @@ public class ExcelExtractor : POIOLE2TextExtractor, IExcelExtractor private bool formulasNotResults = false; private bool includeCellComments = false; private bool includeBlankCells = false; - private bool includeHeaderFooter = true; + private bool includeHeadersFooters = true; /// /// Initializes a new instance of the class. /// @@ -65,13 +65,13 @@ public ExcelExtractor(POIFSFileSystem fs) /// /// Should header and footer be included? Default is true /// - public bool IncludeHeaderFooter + public bool IncludeHeadersFooters { get { - return this.includeHeaderFooter; + return this.includeHeadersFooters; } set { - this.includeHeaderFooter = value; + this.includeHeadersFooters = value; } } /// @@ -137,6 +137,8 @@ public bool IncludeBlankCells } } + public bool AddTabEachEmptyCell { get; set; } + /// /// Retreives the text contents of the file /// @@ -168,7 +170,7 @@ public override String Text } // Header text, if there is any - if (sheet.Header != null && includeHeaderFooter) + if (sheet.Header != null && includeHeadersFooters) { text.Append( ExtractHeaderFooter(sheet.Header) @@ -289,7 +291,7 @@ public override String Text } // Finally Feader text, if there is any - if (sheet.Footer != null && includeHeaderFooter) + if (sheet.Footer != null && includeHeadersFooters) { text.Append( ExtractHeaderFooter(sheet.Footer) diff --git a/main/SS/Extractor/ExcelExtractor.cs b/main/SS/Extractor/ExcelExtractor.cs index 9cfb8ec40..91c298abd 100644 --- a/main/SS/Extractor/ExcelExtractor.cs +++ b/main/SS/Extractor/ExcelExtractor.cs @@ -27,7 +27,9 @@ public interface IExcelExtractor bool IncludeCellComments { get; set; } bool IncludeSheetNames { get; set; } bool FormulasNotResults { get; set; } - bool IncludeHeaderFooter { get; set; } + bool IncludeHeadersFooters { get; set; } + //Add a tab delimiter for each empty cell. + bool AddTabEachEmptyCell { get; set; } /** * Retreives the text contents of the file */ diff --git a/ooxml/NPOI.OOXML.Core.csproj b/ooxml/NPOI.OOXML.Core.csproj index 08eca7f48..fb7a63b6a 100644 --- a/ooxml/NPOI.OOXML.Core.csproj +++ b/ooxml/NPOI.OOXML.Core.csproj @@ -13,7 +13,6 @@ - @@ -26,6 +25,7 @@ + diff --git a/ooxml/POIXMLTextExtractor.cs b/ooxml/POIXMLTextExtractor.cs index ec85b072f..ecfbcaf78 100644 --- a/ooxml/POIXMLTextExtractor.cs +++ b/ooxml/POIXMLTextExtractor.cs @@ -39,21 +39,21 @@ public POIXMLTextExtractor(POIXMLDocument document) /** * Returns the core document properties */ - public CoreProperties GetCoreProperties() + public virtual CoreProperties GetCoreProperties() { return _document.GetProperties().CoreProperties; } /** * Returns the extended document properties */ - public ExtendedProperties GetExtendedProperties() + public virtual ExtendedProperties GetExtendedProperties() { return _document.GetProperties().ExtendedProperties; } /** * Returns the custom document properties */ - public CustomProperties GetCustomProperties() + public virtual CustomProperties GetCustomProperties() { return _document.GetProperties().CustomProperties; } diff --git a/ooxml/SS/Converter/ExcelToHtmlConverter.cs b/ooxml/SS/Converter/ExcelToHtmlConverter.cs index e262b11f3..ed259c4a2 100644 --- a/ooxml/SS/Converter/ExcelToHtmlConverter.cs +++ b/ooxml/SS/Converter/ExcelToHtmlConverter.cs @@ -811,7 +811,7 @@ private static void BuildStyle_Border(IWorkbook workbook, StringBuilder style, S var stylesSource = ((XSSFWorkbook) workbook).GetStylesSource(); if (stylesSource != null) { - var theme = stylesSource.GetTheme(); + var theme = stylesSource.Theme; if (theme != null) color = theme.GetThemeColor(borderColor); } @@ -853,9 +853,9 @@ private static void BuildStyle_Font(IWorkbook workbook, StringBuilder style, IFo { StylesTable st = ((XSSFWorkbook)workbook).GetStylesSource(); XSSFColor fontColor = null; - if (st != null && st.GetTheme() != null) + if (st != null && st.Theme != null) { - fontColor = st.GetTheme().GetThemeColor(font.Color); + fontColor = st.Theme.GetThemeColor(font.Color); } else { diff --git a/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs b/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs new file mode 100644 index 000000000..66e13e8e3 --- /dev/null +++ b/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs @@ -0,0 +1,308 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +namespace NPOI.XSSF.EventUserModel +{ + using NPOI.OpenXml4Net.OPC; + using NPOI.XSSF.UserModel; + using System; + using System.Collections.Generic; + using System.IO; + using System.Text; + using NSAX; + using NSAX.Helpers; + + /// + /// + /// + /// + /// This is a lightweight way to process the Shared Strings + /// table. Most of the text cells will reference something + /// from in here. + /// + /// + /// Note that each SI entry can have multiple T elements, if the + /// string is made up of bits with different formatting. + /// + /// + /// Example input: + /// + /// <?xml version="1.0" encoding="UTF-8" standalone="yes" ?> + /// <sst xmlns="http://schemas.Openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2"> + /// <si> + /// <r> + /// <rPr> + /// <b /> + /// <sz val="11" /> + /// <color theme="1" /> + /// <rFont val="Calibri" /> + /// <family val="2" /> + /// <scheme val="minor" /> + /// </rPr> + /// <t>This:</t> + /// </r> + /// <r> + /// <rPr> + /// <sz val="11" /> + /// <color theme="1" /> + /// <rFont val="Calibri" /> + /// <family val="2" /> + /// <scheme val="minor" /> + /// </rPr> + /// <t xml:space="preserve">Causes Problems</t> + /// </r> + /// </si> + /// <si> + /// <t>This does not</t> + /// </si> + /// </sst> + /// + /// + /// + public class ReadOnlySharedStringsTable : DefaultHandler + { + + private bool includePhoneticRuns; + /// + /// An integer representing the total count of strings in the workbook. This count does not + /// include any numbers, it counts only the total of text strings in the workbook. + /// + private int count; + + /// + /// An integer representing the total count of unique strings in the Shared String Table. + /// A string is unique even if it is a copy of another string, but has different formatting applied + /// at the character level. + /// + private int uniqueCount; + + /// + /// The shared strings table. + /// + private List strings; + + /// + /// Map of phonetic strings (if they exist) indexed + /// with the integer matching the index in strings + /// + private Dictionary phoneticStrings; + + /// + /// Calls with + /// a value of true for including phonetic runs + /// + /// The to use as basis for the shared-strings table. + /// If reading the data from the package fails. + /// if parsing the XML data fails. + public ReadOnlySharedStringsTable(OPCPackage pkg) + : this(pkg, true) + { + } + + /// + /// + /// The to use as basis for the shared-strings table. + /// whether or not to concatenate phoneticRuns onto the shared string + /// IOException If reading the data from the package fails. + /// SAXException if parsing the XML data fails. + /// @since POI 3.14-Beta3 + public ReadOnlySharedStringsTable(OPCPackage pkg, bool includePhoneticRuns) + { + this.includePhoneticRuns = includePhoneticRuns; + List parts = + pkg.GetPartsByContentType(XSSFRelation.SHARED_STRINGS.ContentType); + + // Some workbooks have no shared strings table. + if (parts.Count > 0) + { + PackagePart sstPart = parts[0]; + ReadFrom(sstPart.GetInputStream()); + } + } + + /// + /// + /// Like POIXMLDocumentPart constructor + /// + /// + /// Calls , with a + /// value of true to include phonetic runs. + /// + /// + /// @since POI 3.14-Beta1 + public ReadOnlySharedStringsTable(PackagePart part) + : this(part, true) + { + } + + /// + /// Like POIXMLDocumentPart constructor + /// + /// @since POI 3.14-Beta3 + public ReadOnlySharedStringsTable(PackagePart part, bool includePhoneticRuns) + + { + + this.includePhoneticRuns = includePhoneticRuns; + ReadFrom(part.GetInputStream()); + } + + /// + /// Read this shared strings table from an XML file. + /// + /// The input stream containing the XML document. + /// if an error occurs while reading. + /// if parsing the XML data fails. + public void ReadFrom(Stream is1) + { + // test if the file is empty, otherwise parse it + //PushbackInputStream pis = new PushbackInputStream(is1, 1); + //int emptyTest = pis.Read(); + //if (emptyTest > -1) + if(is1.Length > 0) + { + //pis.Unread(emptyTest); + InputSource sheetSource = new InputSource(is1); + //try + { + NSAX.AElfred.SAXDriver sheetParser = new NSAX.AElfred.SAXDriver(); + sheetParser.ContentHandler = (this); + sheetParser.Parse(sheetSource); + } + //catch (ParserConfigurationException e) + //{ + // throw new RuntimeException("SAX parser appears to be broken - " + e.GetMessage()); + //} + } + } + + /// + /// Return an integer representing the total count of strings in the workbook. This count does not + /// include any numbers, it counts only the total of text strings in the workbook. + /// + /// the total count of strings in the workbook + public int Count => count; + + /// + /// Returns an integer representing the total count of unique strings in the Shared String Table. + /// A string is unique even if it is a copy of another string, but has different formatting applied + /// at the character level. + /// + /// the total count of unique strings in the workbook + public int UniqueCount => uniqueCount; + + /// + /// Return the string at a given index. + /// Formatting is ignored. + /// + /// index of item to return. + /// the item at the specified position in this Shared String table. + public String GetEntryAt(int idx) + { + return strings[idx]; + } + + public List Items => strings; + + + //// ContentHandler methods //// + + private StringBuilder characters; + private bool tIsOpen; + private bool inRPh; + + public override void StartElement(String uri, String localName, String name, + IAttributes attributes) + { + + if (uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) + { + return; + } + + if ("sst".Equals(localName)) + { + String count = attributes.GetValue("count"); + if (count != null) this.count = Int32.Parse(count); + String uniqueCount = attributes.GetValue("uniqueCount"); + if (uniqueCount != null) this.uniqueCount = Int32.Parse(uniqueCount); + + this.strings = new List(this.uniqueCount); + this.phoneticStrings = new Dictionary(); + characters = new StringBuilder(); + } + else if ("si".Equals(localName)) + { + characters.Length = 0; + } + else if ("t".Equals(localName)) + { + tIsOpen = true; + } + else if ("rPh".Equals(localName)) + { + inRPh = true; + //append space...this assumes that rPh always comes After regular + if (includePhoneticRuns && characters.Length > 0) + { + characters.Append(" "); + } + } + } + + public override void EndElement(String uri, String localName, String name) + + { + + if (uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) + { + return; + } + + if ("si".Equals(localName)) + { + strings.Add(characters.ToString()); + } + else if ("t".Equals(localName)) + { + tIsOpen = false; + } + else if ("rPh".Equals(localName)) + { + inRPh = false; + } + } + + /// + /// Captures characters only if a t(ext) element is open. + /// + public override void Characters(char[] ch, int start, int length) + { + if (tIsOpen) + { + if (inRPh && includePhoneticRuns) + { + characters.Append(ch, start, length); + } + else if (!inRPh) + { + characters.Append(ch, start, length); + } + } + } + } +} + diff --git a/ooxml/XSSF/EventUserModel/XSSFReader.cs b/ooxml/XSSF/EventUserModel/XSSFReader.cs new file mode 100644 index 000000000..cc27d17fd --- /dev/null +++ b/ooxml/XSSF/EventUserModel/XSSFReader.cs @@ -0,0 +1,502 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace NPOI.XSSF.EventUserModel +{ + + using NPOI; + using NPOI.OpenXml4Net.Exceptions; + using NPOI.OpenXml4Net.OPC; + using NPOI.Util; + using NPOI.XSSF.Model; + using NPOI.XSSF.UserModel; + using NSAX; + using NSAX.Helpers; + using System.Xml; + + /// + /// This class makes it easy to Get at individual parts + /// of an OOXML .xlsx file, suitable for low memory sax + /// parsing or similar. + /// It makes up the core part of the EventUserModel support + /// for XSSF. + /// + public class XSSFReader + { + + private static ISet WORKSHEET_RELS = + new HashSet( + Arrays.AsList(new String[]{ + XSSFRelation.WORKSHEET.Relation, + XSSFRelation.CHARTSHEET.Relation, + }) + ); + //private static POILogger LOGGER = POILogFactory.GetLogger(XSSFReader.class); + + protected OPCPackage pkg; + protected PackagePart workbookPart; + + /// + /// Creates a new XSSFReader, for the given package + /// + public XSSFReader(OPCPackage pkg) + { + + this.pkg = pkg; + + PackageRelationship coreDocRelationship = this.pkg.GetRelationshipsByType( + PackageRelationshipTypes.CORE_DOCUMENT).GetRelationship(0); + + // strict OOXML likely not fully supported, see #57699 + // this code is similar to POIXMLDocumentPart.PartFromOPCPackage, but I could not combine it + // easily due to different return values + if (coreDocRelationship == null) + { + if (this.pkg.GetRelationshipsByType( + PackageRelationshipTypes.STRICT_CORE_DOCUMENT).GetRelationship(0) != null) + { + throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699"); + } + + throw new POIXMLException("OOXML file structure broken/invalid - no core document found!"); + } + + // Get the part that holds the workbook + workbookPart = this.pkg.GetPart(coreDocRelationship); + } + + + /// + /// Opens up the Shared Strings Table, parses it, and + /// returns a handy object for working with + /// shared strings. + /// + public SharedStringsTable SharedStringsTable + { + get + { + List parts = pkg.GetPartsByContentType(XSSFRelation.SHARED_STRINGS.ContentType); + return parts.Count == 0 ? null : new SharedStringsTable(parts[0]); + } + } + + /// + /// Opens up the Styles Table, parses it, and + /// returns a handy object for working with cell styles + /// + public StylesTable StylesTable + { + get + { + List parts = pkg.GetPartsByContentType(XSSFRelation.STYLES.ContentType); + if(parts.Count == 0) + return null; + + // Create the Styles Table, and associate the Themes if present + StylesTable styles = new StylesTable(parts[0]); + parts = pkg.GetPartsByContentType(XSSFRelation.THEME.ContentType); + if(parts.Count != 0) + { + styles.Theme = (new ThemesTable(parts[0])); + } + return styles; + } + + } + + + /// + /// Returns an InputStream to read the contents of the + /// shared strings table. + /// + public Stream SharedStringsData => XSSFRelation.SHARED_STRINGS.GetContents(workbookPart); + + /// + /// Returns an InputStream to read the contents of the + /// styles table. + /// + public Stream StylesData => XSSFRelation.STYLES.GetContents(workbookPart); + + /// + /// Returns an InputStream to read the contents of the + /// themes table. + /// + public Stream ThemesData => XSSFRelation.THEME.GetContents(workbookPart); + + /// + /// Returns an InputStream to read the contents of the + /// main Workbook, which contains key overall data for + /// the file, including sheet definitions. + /// + public Stream WorkbookData => workbookPart.GetInputStream(); + + /// + /// Returns an InputStream to read the contents of the + /// specified Sheet. + /// + /// The relationId of the sheet, from a r:id on the workbook + public Stream GetSheet(String relId) + { + + PackageRelationship rel = workbookPart.GetRelationship(relId); + if (rel == null) + { + throw new ArgumentException("No Sheet found with r:id " + relId); + } + + PackagePartName relName = PackagingUriHelper.CreatePartName(rel.TargetUri); + PackagePart sheet = pkg.GetPart(relName); + if (sheet == null) + { + throw new ArgumentException("No data found for Sheet with r:id " + relId); + } + return sheet.GetInputStream(); + } + + /// + /// Returns an Iterator which will let you Get at all the + /// different Sheets in turn. + /// Each sheet's InputStream is only opened when fetched + /// from the Iterator. It's up to you to close the + /// InputStreams when done with each one. + /// + public IEnumerator GetSheetsData() + { + + return new SheetIterator(workbookPart); + } + + /// + /// Iterator over sheet data. + /// + public class SheetIterator : IEnumerator + { + + /// + /// Maps relId and the corresponding PackagePart + /// + private Dictionary sheetMap; + + /// + /// Current sheet reference + /// + XSSFSheetRef xssfSheetRef; + + /// + /// Iterator over CTSheet objects, returns sheets in logical order. + /// We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, + /// i.e. as they are stored in the underlying package + /// + IEnumerator sheetIterator; + + + /// + /// Construct a new SheetIterator + /// + /// package part holding workbook.xml + internal SheetIterator(PackagePart wb) + { + /* + * The order of sheets is defined by the order of CTSheet elements in workbook.xml + */ + try + { + //step 1. Map sheet's relationship Id and the corresponding PackagePart + sheetMap = new Dictionary(); + OPCPackage pkg = wb.Package; + ISet worksheetRels = SheetRelationships; + foreach (PackageRelationship rel in wb.Relationships) + { + String relType = rel.RelationshipType; + if (worksheetRels.Contains(relType)) + { + PackagePartName relName = PackagingUriHelper.CreatePartName(rel.TargetUri); + sheetMap.Add(rel.Id, pkg.GetPart(relName)); + } + } + //step 2. Read array of CTSheet elements, wrap it in a LinkedList + //and construct an iterator + sheetIterator = CreateSheetIteratorFromWB(wb).GetEnumerator(); + } + catch (InvalidFormatException e) + { + throw new POIXMLException(e); + } + } + + static List CreateSheetIteratorFromWB(PackagePart wb) + { + XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader(); + NSAX.AElfred.SAXDriver xmlReader; + try + { + xmlReader = new NSAX.AElfred.SAXDriver();// SAXHelper.newXMLReader(); + } + //catch (ParserConfigurationException e) + //{ + // throw new POIXMLException(e); + //} + catch (SAXException e) + { + throw new POIXMLException(e); + } + xmlReader.ContentHandler = (xmlSheetRefReader); + try + { + xmlReader.Parse(new InputSource(wb.GetInputStream())); + } + catch (SAXException e) + { + throw new POIXMLException(e); + } + + List validSheets = new List(); + foreach (XSSFSheetRef xssfSheetRef in xmlSheetRefReader.GetSheetRefs()) + { + //if there's no relationship id, silently skip the sheet + String sheetId = xssfSheetRef.Id; + if (sheetId != null && sheetId.Length > 0) + { + validSheets.Add(xssfSheetRef); + } + } + return validSheets; + } + + /// + /// Gets string representations of relationships + /// that are sheet-like. Added to allow subclassing + /// by XSSFBReader. This is used to decide what + /// relationships to load into the sheetRefs + /// + /// all relationships that are sheet-like + static ISet SheetRelationships => WORKSHEET_RELS; + + /// + /// Returns true if the iteration has more elements. + /// + /// true if the iterator has more elements. + //public bool HasNext() + //{ + // return sheetIterator.HasNext(); + //} + + /// + /// Returns input stream of the next sheet in the iteration + /// + /// input stream of the next sheet in the iteration + private Stream Next() + { + xssfSheetRef = sheetIterator.Current; + + String sheetId = xssfSheetRef.Id; + try + { + PackagePart sheetPkg = sheetMap[sheetId]; + return sheetPkg.GetInputStream(); + } + catch (IOException e) + { + throw new POIXMLException(e); + } + } + + public Stream Current => Next(); + + object IEnumerator.Current => Next(); + public bool MoveNext() + { + return sheetIterator.MoveNext(); + } + + public void Reset() + { + sheetIterator.Reset(); + } + + public void Dispose() + { + sheetIterator.Dispose(); + } + + /// + /// Returns name of the current sheet + /// + /// name of the current sheet + public String SheetName => xssfSheetRef.Name; + + /// + /// Returns the comments associated with this sheet, + /// or null if there aren't any + /// + public CommentsTable SheetComments + { + get + { + PackagePart sheetPkg = SheetPart; + + // Do we have a comments relationship? (Only ever one if so) + try + { + PackageRelationshipCollection commentsList = + sheetPkg.GetRelationshipsByType(XSSFRelation.SHEET_COMMENTS.Relation); + if(commentsList.Size > 0) + { + PackageRelationship comments = commentsList.GetRelationship(0); + PackagePartName commentsName = PackagingUriHelper.CreatePartName(comments.TargetUri); + PackagePart commentsPart = sheetPkg.Package.GetPart(commentsName); + return new CommentsTable(commentsPart); + } + } + catch(InvalidFormatException) + { + return null; + } + catch(IOException) + { + return null; + } + return null; + } + } + + /// + /// Returns the shapes associated with this sheet, + /// an empty list or null if there is an exception + /// + public List Shapes + { + get + { + PackagePart sheetPkg = SheetPart; + List shapes = new List(); + // Do we have a comments relationship? (Only ever one if so) + try + { + PackageRelationshipCollection drawingsList = sheetPkg.GetRelationshipsByType(XSSFRelation.DRAWINGS.Relation); + for(int i = 0; i < drawingsList.Size; i++) + { + PackageRelationship drawings = drawingsList.GetRelationship(i); + PackagePartName drawingsName = PackagingUriHelper.CreatePartName(drawings.TargetUri); + PackagePart drawingsPart = sheetPkg.Package.GetPart(drawingsName); + if(drawingsPart == null) + { + //parts can go missing; Excel ignores them silently -- TIKA-2134 + //LOGGER.log(POILogger.WARN, "Missing Drawing: " + drawingsName + ". Skipping it."); + continue; + } + XSSFDrawing drawing = new XSSFDrawing(drawingsPart); + shapes.AddRange(drawing.GetShapes()); + } + } + catch(XmlException) + { + return null; + } + catch(InvalidFormatException) + { + return null; + } + catch(IOException) + { + return null; + } + return shapes; + } + } + + public PackagePart SheetPart => sheetMap[xssfSheetRef.Id]; + + /// + /// We're read only, so remove isn't supported + /// + public void Remove() + { + throw new InvalidOperationException("Not supported"); + } + + + } + + public sealed class XSSFSheetRef + { + //do we need to store sheetId, too? + private String id; + private String name; + + public XSSFSheetRef(String id, String name) + { + this.id = id; + this.name = name; + } + + public String Id => id; + + public String Name => name; + } + + //scrapes sheet reference info and order from workbook.xml + private class XMLSheetRefReader : DefaultHandler + { + private static String SHEET = "sheet"; + private static String ID = "id"; + private static String NAME = "name"; + + private List sheetRefs = new List(); + + // read + // and add XSSFSheetRef(id="rId6", name="Sheet6") to sheetRefs + public override void StartElement(String uri, String localName, String qName, IAttributes attrs) + { + + if (localName.Equals(SHEET, StringComparison.OrdinalIgnoreCase)) + { + String name = null; + String id = null; + for (int i = 0; i < attrs.Length; i++) + { + String attrName = attrs.GetLocalName(i); + if (attrName.Equals(NAME, StringComparison.OrdinalIgnoreCase)) + { + name = attrs.GetValue(i); + } + else if (attrName.Equals(ID, StringComparison.OrdinalIgnoreCase)) + { + id = attrs.GetValue(i); + } + if (name != null && id != null) + { + sheetRefs.Add(new XSSFSheetRef(id, name)); + break; + } + } + } + } + + public List GetSheetRefs() + { + return sheetRefs; + } + } + } +} diff --git a/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs b/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs new file mode 100644 index 000000000..71c7a04d9 --- /dev/null +++ b/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs @@ -0,0 +1,611 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace NPOI.XSSF.EventUserModel +{ + using static NPOI.XSSF.UserModel.XSSFRelation; + + + using NPOI.SS.UserModel; + using NPOI.SS.Util; + using NPOI.Util; + using NPOI.XSSF.Model; + using NPOI.XSSF.UserModel; + using NSAX.Helpers; + using NPOI.OpenXmlFormats.Spreadsheet; + using NSAX; + + /// + /// This class handles the processing of a sheet#.xml + /// sheet part of a XSSF .xlsx file, and generates + /// row and cell events for it. + /// + public class XSSFSheetXMLHandler : DefaultHandler + { + //private static POILogger logger = POILogFactory.GetLogger(XSSFSheetXMLHandler.class); + + /// + /// These are the different kinds of cells we support. + /// We keep track of the current one between + /// the start and end. + /// + enum XSSFDataType + { + Boolean, + Error, + Formula, + InlineString, + SSTString, + Number, + } + + /// + /// Table with the styles used for formatting + /// + private StylesTable stylesTable; + + /// + /// Table with cell comments + /// + private CommentsTable commentsTable; + + /// + /// Read only access to the shared strings table, for looking + /// up (most) string cell's contents + /// + private ReadOnlySharedStringsTable sharedStringsTable; + + /// + /// Where our text is going + /// + private SheetContentsHandler output; + + // Set when V start element is seen + private bool vIsOpen; + // Set when F start element is seen + private bool fIsOpen; + // Set when an Inline String "is" is seen + private bool isIsOpen; + // Set when a header/footer element is seen + private bool hfIsOpen; + + // Set when cell start element is seen; + // used when cell close element is seen. + private XSSFDataType nextDataType; + + // Used to format numeric cell values. + private short formatIndex; + private String formatString; + private DataFormatter formatter; + private int rowNum; + private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well + private String cellRef; + private bool formulasNotResults; + + // Gathers characters as they are seen. + private StringBuilder value = new StringBuilder(); + private StringBuilder formula = new StringBuilder(); + private StringBuilder headerFooter = new StringBuilder(); + + private Queue commentCellRefs; + + /// + /// Accepts objects needed while parsing. + /// + /// Table of styles + /// Table of shared strings + public XSSFSheetXMLHandler( + StylesTable styles, + CommentsTable comments, + ReadOnlySharedStringsTable strings, + SheetContentsHandler sheetContentsHandler, + DataFormatter dataFormatter, + bool formulasNotResults) + { + this.stylesTable = styles; + this.commentsTable = comments; + this.sharedStringsTable = strings; + this.output = sheetContentsHandler; + this.formulasNotResults = formulasNotResults; + this.nextDataType = XSSFDataType.Number; + this.formatter = dataFormatter; + Init(); + } + + /// + /// Accepts objects needed while parsing. + /// + /// Table of styles + /// Table of shared strings + public XSSFSheetXMLHandler( + StylesTable styles, + ReadOnlySharedStringsTable strings, + SheetContentsHandler sheetContentsHandler, + DataFormatter dataFormatter, + bool formulasNotResults) + : this(styles, null, strings, sheetContentsHandler, dataFormatter, formulasNotResults) + { + + } + + /// + /// Accepts objects needed while parsing. + /// + /// Table of styles + /// Table of shared strings + public XSSFSheetXMLHandler( + StylesTable styles, + ReadOnlySharedStringsTable strings, + SheetContentsHandler sheetContentsHandler, + bool formulasNotResults) + : this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults) + { + + } + + private void Init() + { + if(commentsTable != null) + { + commentCellRefs = new Queue(); + //noinspection deprecation + foreach(CT_Comment comment in commentsTable.GetCTComments().commentList.GetCommentArray()) + { + commentCellRefs.Enqueue(new CellAddress(comment.@ref)); + } + } + } + + private bool IsTextTag(String name) + { + if("v".Equals(name)) + { + // Easy, normal v text tag + return true; + } + if("inlineStr".Equals(name)) + { + // Easy inline string + return true; + } + if("t".Equals(name) && isIsOpen) + { + // Inline string ... pair + return true; + } + // It isn't a text tag + return false; + } + public override void StartElement(String uri, String localName, String qName, + IAttributes attributes) + { + + + if(uri != null && !uri.Equals(NS_SPREADSHEETML)) + { + return; + } + + if(IsTextTag(localName)) + { + vIsOpen = true; + // Clear contents cache + value.Length = 0; + } + else if("is".Equals(localName)) + { + // Inline string outer tag + isIsOpen = true; + } + else if("f".Equals(localName)) + { + // Clear contents cache + formula.Length = 0; + + // Mark us as being a formula if not already + if(nextDataType == XSSFDataType.Number) + { + nextDataType = XSSFDataType.Formula; + } + + // Decide where to Get the formula string from + String type = attributes.GetValue("t"); + if(type != null && type.Equals("shared")) + { + // Is it the one that defines the shared, or uses it? + String ref1 = attributes.GetValue("ref"); + String si = attributes.GetValue("si"); + + if(ref1 != null) + { + // This one defines it + // TODO Save it somewhere + fIsOpen = true; + } + else + { + // This one uses a shared formula + // TODO Retrieve the shared formula and tweak it to + // match the current cell + if(formulasNotResults) + { + //logger.log(POILogger.WARN, "shared formulas not yet supported!"); + } + /*else { + // It's a shared formula, so we can't Get at the formula string yet + // However, they don't care about the formula string, so that's ok! + }*/ + } + } + else + { + fIsOpen = true; + } + } + else if("oddHeader".Equals(localName) || "evenHeader".Equals(localName) || + "firstHeader".Equals(localName) || "firstFooter".Equals(localName) || + "oddFooter".Equals(localName) || "evenFooter".Equals(localName)) + { + hfIsOpen = true; + // Clear contents cache + headerFooter.Length = 0; + } + else if("row".Equals(localName)) + { + String rowNumStr = attributes.GetValue("r"); + if(rowNumStr != null) + { + rowNum = Int32.Parse(rowNumStr) - 1; + } + else + { + rowNum = nextRowNum; + } + output.StartRow(rowNum); + } + // c => cell + else if("c".Equals(localName)) + { + // Set up defaults. + this.nextDataType = XSSFDataType.Number; + this.formatIndex = -1; + this.formatString = null; + cellRef = attributes.GetValue("r"); + String cellType = attributes.GetValue("t"); + String cellStyleStr = attributes.GetValue("s"); + if("b".Equals(cellType)) + nextDataType = XSSFDataType.Boolean; + else if("e".Equals(cellType)) + nextDataType = XSSFDataType.Error; + else if("inlineStr".Equals(cellType)) + nextDataType = XSSFDataType.InlineString; + else if("s".Equals(cellType)) + nextDataType = XSSFDataType.SSTString; + else if("str".Equals(cellType)) + nextDataType = XSSFDataType.Formula; + else + { + // Number, but almost certainly with a special style or format + XSSFCellStyle style = null; + if(stylesTable != null) + { + if(cellStyleStr != null) + { + int styleIndex = int.Parse(cellStyleStr); + style = stylesTable.GetStyleAt(styleIndex); + } + else if(stylesTable.NumCellStyles > 0) + { + style = stylesTable.GetStyleAt(0); + } + } + if(style != null) + { + this.formatIndex = style.DataFormat; + this.formatString = style.GetDataFormatString(); + if(this.formatString == null) + this.formatString = BuiltinFormats.GetBuiltinFormat(this.formatIndex); + } + } + } + } + public override void EndElement(String uri, String localName, String qName) + + { + + + if(uri != null && !uri.Equals(NS_SPREADSHEETML)) + { + return; + } + + String thisStr = null; + + // v => contents of a cell + if(IsTextTag(localName)) + { + vIsOpen = false; + + // Process the value contents as required, now we have it all + switch(nextDataType) + { + case XSSFDataType.Boolean: + char first = value[0]; + thisStr = first == '0' ? "FALSE" : "TRUE"; + break; + + case XSSFDataType.Error: + thisStr = "ERROR:" + value; + break; + + case XSSFDataType.Formula: + if(formulasNotResults) + { + thisStr = formula.ToString(); + } + else + { + String fv = value.ToString(); + + if(this.formatString != null) + { + try + { + // Try to use the value as a formattable number + double d = double.Parse(fv); + thisStr = formatter.FormatRawCellContents(d, this.formatIndex, this.formatString); + } + catch(FormatException) + { + // Formula is a String result not a Numeric one + thisStr = fv; + } + } + else + { + // No formatting applied, just do raw value in all cases + thisStr = fv; + } + } + break; + + case XSSFDataType.InlineString: + // TODO: Can these ever have formatting on them? + XSSFRichTextString rtsi = new XSSFRichTextString(value.ToString()); + thisStr = rtsi.ToString(); + break; + + case XSSFDataType.SSTString: + String sstIndex = value.ToString(); + try + { + int idx = int.Parse(sstIndex); + XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.GetEntryAt(idx)); + thisStr = rtss.ToString(); + } + catch(FormatException) + { + //logger.log(POILogger.ERROR, "Failed to parse SST index '" + sstIndex, ex); + } + break; + + case XSSFDataType.Number: + String n = value.ToString(); + if(this.formatString != null && n.Length > 0) + thisStr = formatter.FormatRawCellContents(Double.Parse(n), this.formatIndex, this.formatString); + else + thisStr = n; + break; + + default: + thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; + break; + } + + // Do we have a comment for this cell? + CheckForEmptyCellComments(EmptyCellCommentsCheckType.Cell); + XSSFComment comment = commentsTable != null ? commentsTable.FindCellComment(new CellAddress(cellRef)) : null; + + // Output + output.Cell(cellRef, thisStr, comment); + } + else if("f".Equals(localName)) + { + fIsOpen = false; + } + else if("is".Equals(localName)) + { + isIsOpen = false; + } + else if("row".Equals(localName)) + { + // Handle any "missing" cells which had comments attached + CheckForEmptyCellComments(EmptyCellCommentsCheckType.EndOfRow); + + // Finish up the row + output.EndRow(rowNum); + + // some sheets do not have rowNum Set in the XML, Excel can read them so we should try to read them as well + nextRowNum = rowNum + 1; + } + else if("sheetData".Equals(localName)) + { + // Handle any "missing" cells which had comments attached + CheckForEmptyCellComments(EmptyCellCommentsCheckType.EndOfSheetData); + } + else if("oddHeader".Equals(localName) || "evenHeader".Equals(localName) || + "firstHeader".Equals(localName)) + { + hfIsOpen = false; + output.HeaderFooter(headerFooter.ToString(), true, localName); + } + else if("oddFooter".Equals(localName) || "evenFooter".Equals(localName) || + "firstFooter".Equals(localName)) + { + hfIsOpen = false; + output.HeaderFooter(headerFooter.ToString(), false, localName); + } + } + + /// + /// Captures characters only if a suitable element is open. + /// Originally was just "v"; extended for inlineStr also. + /// + public override void Characters(char[] ch, int start, int length) + + { + + if(vIsOpen) + { + value.Append(ch, start, length); + } + if(fIsOpen) + { + formula.Append(ch, start, length); + } + if(hfIsOpen) + { + headerFooter.Append(ch, start, length); + } + } + + /// + /// Do a check for, and output, comments in otherwise empty cells. + /// + private void CheckForEmptyCellComments(EmptyCellCommentsCheckType type) + { + if(commentCellRefs != null && commentCellRefs.Count>0) + { + // If we've reached the end of the sheet data, output any + // comments we haven't yet already handled + if(type == EmptyCellCommentsCheckType.EndOfSheetData) + { + while(commentCellRefs.Count>0) + { + OutputEmptyCellComment(commentCellRefs.Dequeue()); + } + return; + } + + // At the end of a row, handle any comments for "missing" rows before us + if(this.cellRef == null) + { + if(type == EmptyCellCommentsCheckType.EndOfRow) + { + while(commentCellRefs.Count>0) + { + if(commentCellRefs.Peek().Row == rowNum) + { + OutputEmptyCellComment(commentCellRefs.Dequeue()); + } + else + { + return; + } + } + return; + } + else + { + throw new InvalidOperationException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum); + } + } + + CellAddress nextCommentCellRef; + do + { + CellAddress cellRef = new CellAddress(this.cellRef); + CellAddress peekCellRef = commentCellRefs.Peek(); + if(type == EmptyCellCommentsCheckType.Cell && cellRef.Equals(peekCellRef)) + { + // remove the comment cell ref from the list if we're about to handle it alongside the cell content + commentCellRefs.Dequeue(); + return; + } + else + { + // fill in any gaps if there are empty cells with comment mixed in with non-empty cells + int comparison = peekCellRef.CompareTo(cellRef); + if(comparison > 0 && type == EmptyCellCommentsCheckType.EndOfRow && peekCellRef.Row <= rowNum) + { + nextCommentCellRef = commentCellRefs.Dequeue(); + OutputEmptyCellComment(nextCommentCellRef); + } + else if(comparison < 0 && type == EmptyCellCommentsCheckType.Cell && peekCellRef.Row <= rowNum) + { + nextCommentCellRef = commentCellRefs.Dequeue(); + OutputEmptyCellComment(nextCommentCellRef); + } + else + { + nextCommentCellRef = null; + } + } + } while(nextCommentCellRef != null && commentCellRefs.Count>0); + } + } + + + /// + /// Output an empty-cell comment. + /// + private void OutputEmptyCellComment(CellAddress cellRef) + { + XSSFComment comment = commentsTable.FindCellComment(cellRef); + output.Cell(cellRef.FormatAsString(), null, comment); + } + + private enum EmptyCellCommentsCheckType + { + Cell, + EndOfRow, + EndOfSheetData + } + + /// + /// You need to implement this to handle the results + /// of the sheet parsing. + /// + public interface SheetContentsHandler + { + /// + /// A row with the (zero based) row number has started */ + /// + public void StartRow(int rowNum); + /// + /// A row with the (zero based) row number has ended */ + /// + public void EndRow(int rowNum); + /// + /// A cell, with the given formatted value (may be null), + /// and possibly a comment (may be null), was encountered */ + /// + public void Cell(String cellReference, String formattedValue, XSSFComment comment); + /// + /// A header or footer has been encountered */ + /// + public void HeaderFooter(String text, bool IsHeader, String tagName); + } + } +} + diff --git a/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs b/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs index e7009b2e5..c0db763ab 100644 --- a/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs +++ b/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs @@ -1,7 +1,7 @@ /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with - this work for Additional information regarding copyright ownership. + this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -14,110 +14,201 @@ the License. You may obtain a copy of the License at See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ -using NPOI.OpenXml4Net.OPC; + using System; -using NPOI.SS.UserModel; -using NPOI.XSSF.Model; +using System.Collections; +using System.Collections.Generic; using System.IO; using System.Text; + namespace NPOI.XSSF.Extractor { + using NPOI; + using NPOI.OpenXml4Net.Exceptions; + using NPOI.OpenXml4Net.OPC; + using NPOI.SS.UserModel; + using NPOI.SS.Extractor; + using NPOI.Util; + using NPOI.XSSF.EventUserModel; + using NPOI.XSSF.Model; + using NPOI.XSSF.UserModel; + using System.Globalization; + using NSAX; + using NSAX.AElfred; + using static NPOI.XSSF.EventUserModel.XSSFSheetXMLHandler; + using NPOI.OpenXml4Net; - /** - * Implementation of a text extractor from OOXML Excel - * files that uses SAX event based parsing. - */ - public class XSSFEventBasedExcelExtractor : POIXMLTextExtractor + /// + /// Implementation of a text extractor from OOXML Excel + /// files that uses SAX event based parsing. + /// + public class XSSFEventBasedExcelExtractor : POIXMLTextExtractor, IExcelExtractor { + + //private static POILogger LOGGER = POILogFactory.GetLogger(XSSFEventBasedExcelExtractor.class); + private OPCPackage container; private POIXMLProperties properties; - private Locale locale; + private CultureInfo locale; + private bool includeTextBoxes = true; private bool includeSheetNames = true; + private bool includeCellComments = false; + private bool includeHeadersFooters = true; private bool formulasNotResults = false; + private bool concatenatePhoneticRuns = true; public XSSFEventBasedExcelExtractor(String path) : this(OPCPackage.Open(path)) { - } - public XSSFEventBasedExcelExtractor(OPCPackage Container) + public XSSFEventBasedExcelExtractor(OPCPackage container) : base(null) { - this.container = Container; - properties = new POIXMLProperties(Container); + this.container = container; + + properties = new POIXMLProperties(container); + } + + public static void main(String[] args) + { + + if(args.Length < 1) + { + Console.WriteLine("Use:"); + Console.WriteLine(" XSSFEventBasedExcelExtractor "); + return; + } + var extractor = new XSSFEventBasedExcelExtractor(args[0]); + Console.WriteLine(extractor.Text); + extractor.Close(); } - /** - * Should sheet names be included? Default is true - */ - public void SetIncludeSheetNames(bool includeSheetNames) + /// + /// Get or Set should sheet names be included? Default is true + /// + public bool IncludeSheetNames { - this.includeSheetNames = includeSheetNames; + get + { + return includeSheetNames; + } + set + { + includeSheetNames = value; + } } - /** - * Should we return the formula itself, and not - * the result it produces? Default is false - */ - public void SetFormulasNotResults(bool formulasNotResults) + + + /// + /// Should we return the formula itself, and not + /// the result it produces? Default is false + /// + public bool FormulasNotResults { - this.formulasNotResults = formulasNotResults; + get { return formulasNotResults; } + set { formulasNotResults = value; } } - public void SetLocale(Locale locale) + /// + /// Should headers and footers be included? Default is true + /// + public bool IncludeHeadersFooters { - this.locale = locale; + get { return includeHeadersFooters; } + set { includeHeadersFooters = value; } } - /** - * Returns the opened OPCPackage Container. - */ - public OPCPackage GetPackage() + /// + /// Should text from textboxes be included? Default is true + /// + public bool IncludeTextBoxes { - return container; + get { return includeTextBoxes; } + set { includeTextBoxes = value; } + } + + /// + /// + /// whether cell comments should be included + /// + /// @since 3.16-beta3 + public bool IncludeCellComments + { + get { return includeCellComments; } + set { this.includeCellComments = value; } } - /** - * Returns the core document properties - */ + public bool AddTabEachEmptyCell { get; set; } = true; - public NPOI.POIXMLProperties.CoreProperties GetCoreProperties() + /// + /// Concatenate text from <rPh> text elements in SharedStringsTable + /// Default is true; + /// + /// concatenatePhoneticRuns + public void SetConcatenatePhoneticRuns(bool concatenatePhoneticRuns) { - return properties.GetCoreProperties(); + this.concatenatePhoneticRuns = concatenatePhoneticRuns; } - /** - * Returns the extended document properties - */ - public NPOI.POIXMLProperties.ExtendedProperties GetExtendedProperties() + /// CultureInfo + /// + public CultureInfo Locale + { + get { return locale; } + set { locale = value; } + } + /// + /// Returns the opened OPCPackage container. + /// + public OPCPackage GetPackage() { - return properties.GetExtendedProperties(); + return container; } - /** - * Returns the custom document properties - */ - public NPOI.POIXMLProperties.CustomProperties GetCustomProperties() + /// + /// Returns the core document properties + /// + public override CoreProperties GetCoreProperties() + { + return properties.CoreProperties; + } + /// + /// Returns the extended document properties + /// + public override ExtendedProperties GetExtendedProperties() + { + return properties.ExtendedProperties; + } + /// + /// Returns the custom document properties + /// + public override CustomProperties GetCustomProperties() { - return properties.GetCustomProperties(); + return properties.CustomProperties; } - /** - * Processes the given sheet - */ + + + /// + /// Processes the given sheet + /// public void ProcessSheet( SheetContentsHandler sheetContentsExtractor, StylesTable styles, + CommentsTable comments, ReadOnlySharedStringsTable strings, - InputStream sheetInputStream) + Stream sheetInputStream) + { + DataFormatter formatter; - if (locale == null) + if(locale == null) { formatter = new DataFormatter(); } @@ -127,94 +218,138 @@ public void ProcessSheet( } InputSource sheetSource = new InputSource(sheetInputStream); - SAXParserFactory saxFactory = SAXParserFactory.newInstance(); try { - SAXParser saxParser = saxFactory.newSAXParser(); - XMLReader sheetParser = saxParser.GetXMLReader(); - ContentHandler handler = new XSSFSheetXMLHandler( - styles, strings, sheetContentsExtractor, formatter, formulasNotResults); - sheetParser.SetContentHandler(handler); + SAXDriver sheetParser = new SAXDriver(); + IContentHandler handler = new XSSFSheetXMLHandler( + styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults); + sheetParser.ContentHandler = (handler); sheetParser.Parse(sheetSource); } - catch (ParserConfigurationException e) + catch(SAXException e) { - throw new RuntimeException("SAX Parser appears to be broken - " + e.GetMessage()); + throw new RuntimeException("SAX parser appears to be broken - " + e.Message); } } - /** - * Processes the file and returns the text - */ - public String GetText() + /// + /// Processes the file and returns the text + /// + public override String Text { - try + get { - ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container); - XSSFReader xssfReader = new XSSFReader(container); - StylesTable styles = xssfReader.GetStylesTable(); - XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator)xssfReader.GetSheetsData(); + try + { + ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns); + XSSFReader xssfReader = new XSSFReader(container); + StylesTable styles = xssfReader.StylesTable; + XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.GetSheetsData(); - StringBuilder text = new StringBuilder(); - SheetTextExtractor sheetExtractor = new SheetTextExtractor(text); + StringBuilder text = new StringBuilder(); + SheetTextExtractor sheetExtractor = new SheetTextExtractor(this); - while (iter.HasNext()) - { - InputStream stream = iter.next(); - if (includeSheetNames) + while(iter.MoveNext()) { - text.Append(iter.GetSheetName()); - text.Append('\n'); + Stream stream = iter.Current; + if(includeSheetNames) + { + text.Append(iter.SheetName); + text.Append('\n'); + } + CommentsTable comments = includeCellComments ? iter.SheetComments : null; + ProcessSheet(sheetExtractor, styles, comments, strings, stream); + if(includeHeadersFooters) + { + sheetExtractor.AppendHeaderText(text); + } + sheetExtractor.AppendCellText(text); + if(includeTextBoxes) + { + ProcessShapes(iter.Shapes, text); + } + if(includeHeadersFooters) + { + sheetExtractor.AppendFooterText(text); + } + sheetExtractor.Reset(); + stream.Close(); } - ProcessSheet(sheetExtractor, styles, strings, stream); - stream.Close(); - } - return text.ToString(); + return text.ToString(); + } + catch(IOException) + { + //LOGGER.log(POILogger.WARN, e); + return null; + } + catch(SAXException) + { + //LOGGER.log(POILogger.WARN, se); + return null; + } + catch(OpenXml4NetException) + { + //LOGGER.log(POILogger.WARN, o4je); + return null; + } } - catch (IOException e) + + } + + static void ProcessShapes(List shapes, StringBuilder text) + { + if(shapes == null) { - System.err.println(e); - return null; + return; } - catch (OpenXML4NetException o4je) + foreach(XSSFShape shape in shapes) { - System.err.println(o4je); - return null; + if(shape is XSSFSimpleShape) + { + String sText = ((XSSFSimpleShape)shape).Text; + if(sText != null && sText.Length > 0) + { + text.Append(sText).Append('\n'); + } + } } } - public void Close() + public override void Close() { - if (container != null) + + if(container != null) { container.Close(); container = null; } - base.close(); + base.Close(); } + protected class SheetTextExtractor : SheetContentsHandler { - private StringBuilder output; - private bool firstCellOfRow = true; - - protected SheetTextExtractor(StringBuilder output) + private StringBuilder output; + private bool firstCellOfRow; + private Dictionary headerFooterMap; + private XSSFEventBasedExcelExtractor eb; + public SheetTextExtractor(XSSFEventBasedExcelExtractor eb) { - this.output = output; + this.eb = eb; + this.output = new StringBuilder(); + this.firstCellOfRow = true; + this.headerFooterMap = eb.IncludeHeadersFooters ? new Dictionary() : null; } - - public void startRow(int rowNum) + public void StartRow(int rowNum) { firstCellOfRow = true; } - - public void endRow() + public void EndRow(int rowNum) { output.Append('\n'); } - - public void cell(String cellRef, String formattedValue) + public void Cell(String cellRef, String formattedValue, XSSFComment comment) { - if (firstCellOfRow) + if(firstCellOfRow) { firstCellOfRow = false; } @@ -222,12 +357,118 @@ public void cell(String cellRef, String formattedValue) { output.Append('\t'); } - output.Append(formattedValue); + if(formattedValue != null) + { + eb.CheckMaxTextSize(output, formattedValue); + output.Append(formattedValue); + } + if(eb.IncludeCellComments && comment != null) + { + String commentText = comment.String.String.Replace('\n', ' '); + output.Append(formattedValue != null ? " Comment by " : "Comment by "); + eb.CheckMaxTextSize(output, commentText); + if(commentText.StartsWith(comment.Author + ": ")) + { + output.Append(commentText); + } + else + { + output.Append(comment.Author).Append(": ").Append(commentText); + } + } + } + public void HeaderFooter(String text, bool IsHeader, String tagName) + { + if(headerFooterMap != null) + { + headerFooterMap[tagName] = text; + } } - public void headerFooter(String text, bool IsHeader, String tagName) + /// + /// Append the text for the named header or footer if found. + /// + private void AppendHeaderFooterText(StringBuilder buffer, String name) { - // We don't include headers in the output yet, so ignore + String text = headerFooterMap.TryGetValue(name, out string value) ? value : null; + if(text != null && text.Length > 0) + { + // this is a naive way of handling the left, center, and right + // header and footer delimiters, but it seems to be as good as + // the method used by XSSFExcelExtractor + text = HandleHeaderFooterDelimiter(text, "&L"); + text = HandleHeaderFooterDelimiter(text, "&C"); + text = HandleHeaderFooterDelimiter(text, "&R"); + buffer.Append(text).Append('\n'); + } + } + /// + /// Remove the delimiter if its found at the beginning of the text, + /// or replace it with a tab if its in the middle. + /// + private static String HandleHeaderFooterDelimiter(String text, String delimiter) + { + int index = text.IndexOf(delimiter); + if(index == 0) + { + text = text.Substring(2); + } + else if(index > 0) + { + text = text.Substring(0, index) + "\t" + text.Substring(index + 2); + } + return text; + } + + + /// + /// Append the text for each header type in the same order + /// they are appended in XSSFExcelExtractor. + /// + /// + /// + public void AppendHeaderText(StringBuilder buffer) + { + AppendHeaderFooterText(buffer, "firstHeader"); + AppendHeaderFooterText(buffer, "oddHeader"); + AppendHeaderFooterText(buffer, "evenHeader"); + } + + /// + /// Append the text for each footer type in the same order + /// they are appended in XSSFExcelExtractor. + /// + /// + /// + public void AppendFooterText(StringBuilder buffer) + { + // append the text for each footer type in the same order + // they are appended in XSSFExcelExtractor + AppendHeaderFooterText(buffer, "firstFooter"); + AppendHeaderFooterText(buffer, "oddFooter"); + AppendHeaderFooterText(buffer, "evenFooter"); + } + + /// + /// Append the cell contents we have collected. + /// + public void AppendCellText(StringBuilder buffer) + { + eb.CheckMaxTextSize(buffer, output.ToString()); + buffer.Append(output); + } + + /// + /// Reset this SheetTextExtractor for the next sheet. + /// + public void Reset() + { + output.Length = 0; + firstCellOfRow = true; + if(headerFooterMap != null) + { + headerFooterMap.Clear(); + } } } } diff --git a/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs b/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs index a33762061..cd626ea8d 100644 --- a/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs +++ b/ooxml/XSSF/Extractor/XSSFExcelExtractor.cs @@ -21,6 +21,7 @@ limitations under the License. using NPOI.SS.UserModel; using System.Globalization; using System.Collections.Generic; +using NPOI.Util; namespace NPOI.XSSF.Extractor { @@ -30,10 +31,10 @@ namespace NPOI.XSSF.Extractor public class XSSFExcelExtractor : POIXMLTextExtractor, NPOI.SS.Extractor.IExcelExtractor { public static XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { - XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK, - XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK, - XSSFRelation.MACROS_WORKBOOK - }; + XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK, + XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK, + XSSFRelation.MACROS_WORKBOOK + }; private readonly XSSFWorkbook workbook; private readonly DataFormatter dataFormatter; @@ -60,7 +61,7 @@ public XSSFExcelExtractor(XSSFWorkbook workbook) /// /// Should header and footer be included? Default is true /// - public bool IncludeHeaderFooter + public bool IncludeHeadersFooters { get { @@ -129,9 +130,12 @@ public bool IncludeTextBoxes includeTextBoxes = value; } } + public bool AddTabEachEmptyCell { get; set; } = true; /** * Should sheet names be included? Default is true */ + [Obsolete("use property IncludeSheetNames")] + [Removal(Version = "4.0")] public void SetIncludeSheetNames(bool includeSheetNames) { this.includeSheetNames = includeSheetNames; @@ -140,6 +144,8 @@ public void SetIncludeSheetNames(bool includeSheetNames) * Should we return the formula itself, and not * the result it produces? Default is false */ + [Obsolete("use property FormulasNotResults")] + [Removal(Version = "4.0")] public void SetFormulasNotResults(bool formulasNotResults) { this.formulasNotResults = formulasNotResults; @@ -147,6 +153,8 @@ public void SetFormulasNotResults(bool formulasNotResults) /** * Should cell comments be included? Default is false */ + [Obsolete("use property IncludeCellComments")] + [Removal(Version = "4.0")] public void SetIncludeCellComments(bool includeCellComments) { this.includeCellComments = includeCellComments; @@ -154,6 +162,8 @@ public void SetIncludeCellComments(bool includeCellComments) /** * Should headers and footers be included? Default is true */ + [Obsolete("use property IncludeHeadersFooters")] + [Removal(Version = "4.0")] public void SetIncludeHeadersFooters(bool includeHeadersFooters) { this.includeHeadersFooters = includeHeadersFooters; @@ -163,6 +173,8 @@ public void SetIncludeHeadersFooters(bool includeHeadersFooters) * Should text within textboxes be included? Default is true * @param includeTextBoxes */ + [Obsolete("use property IncludeTextBoxes")] + [Removal(Version = "4.0")] public void SetIncludeTextBoxes(bool includeTextBoxes) { this.includeTextBoxes = includeTextBoxes; @@ -223,13 +235,16 @@ public override string Text for (int j = 0; j < row.LastCellNum; j++) { // Add a tab delimiter for each empty cell. - if (!firsttime) + if(AddTabEachEmptyCell) { - text.Append("\t"); - } - else - { - firsttime = false; + if(!firsttime) + { + text.Append("\t"); + } + else + { + firsttime = false; + } } ICell cell = row.GetCell(j); diff --git a/ooxml/XSSF/Model/CommentsTable.cs b/ooxml/XSSF/Model/CommentsTable.cs index 7e545140a..763d359de 100644 --- a/ooxml/XSSF/Model/CommentsTable.cs +++ b/ooxml/XSSF/Model/CommentsTable.cs @@ -116,15 +116,9 @@ public void ReferenceUpdated(CellAddress oldReference, CT_Comment comment) } - public int GetNumberOfComments() - { - return comments.commentList.SizeOfCommentArray(); - } + public int NumberOfComments => comments.commentList.SizeOfCommentArray(); - public int GetNumberOfAuthors() - { - return comments.authors.SizeOfAuthorArray(); - } + public int NumberOfAuthors => comments.authors.SizeOfAuthorArray(); public String GetAuthor(long authorId) { diff --git a/ooxml/XSSF/Model/StylesTable.cs b/ooxml/XSSF/Model/StylesTable.cs index 8c8238739..72f123ea8 100644 --- a/ooxml/XSSF/Model/StylesTable.cs +++ b/ooxml/XSSF/Model/StylesTable.cs @@ -130,25 +130,26 @@ public void SetWorkbook(XSSFWorkbook wb) { this.workbook = wb; } - public ThemesTable GetTheme() + public ThemesTable Theme { - return theme; - } - - public void SetTheme(ThemesTable theme) - { - this.theme = theme; - - if (theme != null) theme.SetColorMap(indexedColors); - // Pass the themes table along to things which need to - // know about it, but have already been Created by now - foreach (XSSFFont font in fonts) + get { - font.SetThemesTable(theme); + return theme; } - foreach (XSSFCellBorder border in borders) + set { - border.SetThemesTable(theme); + this.theme = value; + + // Pass the themes table along to things which need to + // know about it, but have already been Created by now + foreach(XSSFFont font in fonts) + { + font.SetThemesTable(theme); + } + foreach(XSSFCellBorder border in borders) + { + border.SetThemesTable(theme); + } } } @@ -199,7 +200,7 @@ public void EnsureThemesTable() { if (theme != null) return; - SetTheme((ThemesTable)workbook.CreateRelationship(XSSFRelation.THEME, XSSFFactory.GetInstance())); + theme = (ThemesTable)workbook.CreateRelationship(XSSFRelation.THEME, XSSFFactory.GetInstance()); } /** * Read this shared styles table from an XML file. @@ -551,10 +552,7 @@ public ReadOnlyCollection GetFills() return fills.AsReadOnly(); } - public ReadOnlyCollection GetFonts() - { - return fonts.AsReadOnly(); - } + public ReadOnlyCollection Fonts => fonts.AsReadOnly(); public IDictionary GetNumberFormats() { diff --git a/ooxml/XSSF/UserModel/XSSFFont.cs b/ooxml/XSSF/UserModel/XSSFFont.cs index 720ce0c95..5e9eb65df 100644 --- a/ooxml/XSSF/UserModel/XSSFFont.cs +++ b/ooxml/XSSF/UserModel/XSSFFont.cs @@ -550,7 +550,7 @@ public override String ToString() // */ public long RegisterTo(StylesTable styles) { - this._themes = styles.GetTheme(); + this._themes = styles.Theme; short idx = (short)styles.PutFont(this, true); this._index = idx; return idx; diff --git a/ooxml/XSSF/UserModel/XSSFRichTextString.cs b/ooxml/XSSF/UserModel/XSSFRichTextString.cs index dd5f323c6..371de8dfe 100644 --- a/ooxml/XSSF/UserModel/XSSFRichTextString.cs +++ b/ooxml/XSSF/UserModel/XSSFRichTextString.cs @@ -756,7 +756,7 @@ private ThemesTable GetThemesTable() { if(styles == null) return null; - return styles.GetTheme(); + return styles.Theme; } } } diff --git a/ooxml/XSSF/UserModel/XSSFSheet.cs b/ooxml/XSSF/UserModel/XSSFSheet.cs index f4fc5f20e..37150c014 100644 --- a/ooxml/XSSF/UserModel/XSSFSheet.cs +++ b/ooxml/XSSF/UserModel/XSSFSheet.cs @@ -950,7 +950,7 @@ public bool HasComments return false; } - return sheetComments.GetNumberOfComments() > 0; + return sheetComments.NumberOfComments > 0; } } @@ -963,7 +963,7 @@ internal int NumberOfComments return 0; } - return sheetComments.GetNumberOfComments(); + return sheetComments.NumberOfComments; } } @@ -4138,7 +4138,7 @@ public void CopyTo(IWorkbook dest, string name, bool copyStyle, bool keepFormula StylesTable styles = ((XSSFWorkbook) dest).GetStylesSource(); if(copyStyle && Workbook.NumberOfFonts > 0) { - foreach(XSSFFont font in ((XSSFWorkbook) Workbook).GetStylesSource().GetFonts()) + foreach(XSSFFont font in ((XSSFWorkbook) Workbook).GetStylesSource().Fonts) { styles.PutFont(font); } diff --git a/ooxml/XSSF/UserModel/XSSFWorkbook.cs b/ooxml/XSSF/UserModel/XSSFWorkbook.cs index 60b280a5c..e17f431a2 100644 --- a/ooxml/XSSF/UserModel/XSSFWorkbook.cs +++ b/ooxml/XSSF/UserModel/XSSFWorkbook.cs @@ -366,7 +366,7 @@ internal override void OnDocumentRead() } } stylesSource.SetWorkbook(this); - stylesSource.SetTheme(theme); + stylesSource.Theme = theme; if (sharedStringSource == null) { @@ -1121,7 +1121,7 @@ public short NumberOfFonts { get { - return (short)stylesSource.GetFonts().Count; + return (short)stylesSource.Fonts.Count; } } @@ -1867,7 +1867,7 @@ public StylesTable GetStylesSource() public ThemesTable GetTheme() { if (stylesSource == null) return null; - return stylesSource.GetTheme(); + return stylesSource.Theme; } /** diff --git a/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj b/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj index 24573ba7c..bf948b49b 100644 --- a/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj +++ b/testcases/ooxml/NPOI.OOXML.TestCases.Core.csproj @@ -1,4 +1,4 @@ - + net472;net8.0 diff --git a/testcases/ooxml/XSSF/EventUserModel/TestReadOnlySharedStringsTable.cs b/testcases/ooxml/XSSF/EventUserModel/TestReadOnlySharedStringsTable.cs new file mode 100644 index 000000000..a0fcc1382 --- /dev/null +++ b/testcases/ooxml/XSSF/EventUserModel/TestReadOnlySharedStringsTable.cs @@ -0,0 +1,125 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + + +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace TestCases.XSSF.EventUserModel +{ + + using System.Text.RegularExpressions; + using NPOI.OpenXml4Net.OPC; + using NPOI.XSSF.Model; + using NPOI.XSSF.UserModel; + using NPOI.XSSF.EventUserModel; + using NUnit.Framework; + using NPOI.OpenXmlFormats.Spreadsheet; + using NUnit.Framework.Legacy; + + /// + /// Tests for + /// + [TestFixture] + public sealed class TestReadOnlySharedStringsTable + { + private static POIDataSamples _ssTests = POIDataSamples.GetSpreadSheetInstance(); + + [Test] + public void TestParse() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx")); + List parts = pkg.GetPartsByName(new Regex("/xl/sharedStrings.xml", RegexOptions.Compiled)); + ClassicAssert.AreEqual(1, parts.Count); + + SharedStringsTable stbl = new SharedStringsTable(parts[0]); + ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts[0]); + + ClassicAssert.AreEqual(stbl.Count, rtbl.Count); + ClassicAssert.AreEqual(stbl.UniqueCount, rtbl.UniqueCount); + + ClassicAssert.AreEqual(stbl.Items.Count, stbl.UniqueCount); + ClassicAssert.AreEqual(rtbl.Items.Count, rtbl.UniqueCount); + for(int i = 0; i < stbl.UniqueCount; i++) + { + CT_Rst i1 = stbl.GetEntryAt(i); + String i2 = rtbl.GetEntryAt(i); + ClassicAssert.AreEqual(i1.t, i2); + } + + } + + //51519 + [Test] + public void TestPhoneticRuns() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("51519.xlsx")); + List parts = pkg.GetPartsByName(new Regex("/xl/sharedStrings.xml", RegexOptions.Compiled)); + ClassicAssert.AreEqual(1, parts.Count); + + ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts[0], true); + List strings = rtbl.Items; + ClassicAssert.AreEqual(49, strings.Count); + + ClassicAssert.AreEqual("\u30B3\u30E1\u30F3\u30C8", rtbl.GetEntryAt(0)); + ClassicAssert.AreEqual("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3", rtbl.GetEntryAt(3)); + + //now do not include phonetic runs + rtbl = new ReadOnlySharedStringsTable(parts[0], false); + strings = rtbl.Items; + ClassicAssert.AreEqual(49, strings.Count); + + ClassicAssert.AreEqual("\u30B3\u30E1\u30F3\u30C8", rtbl.GetEntryAt(0)); + ClassicAssert.AreEqual("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.GetEntryAt(3)); + + } + [Test] + public void TestEmptySSTOnPackageObtainedViaWorkbook() + { + + XSSFWorkbook wb = new XSSFWorkbook(_ssTests.OpenResourceAsStream("noSharedStringTable.xlsx")); + OPCPackage pkg = wb.Package; + assertEmptySST(pkg); + wb.Close(); + } + [Test] + public void TestEmptySSTOnPackageDirect() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("noSharedStringTable.xlsx")); + assertEmptySST(pkg); + } + + private void assertEmptySST(OPCPackage pkg) + { + + ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(pkg); + ClassicAssert.AreEqual(0, sst.Count); + ClassicAssert.AreEqual(0, sst.UniqueCount); + ClassicAssert.IsNull(sst.Items); // same state it's left in if fed a package which has no SST part. + } + + } +} + diff --git a/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs b/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs new file mode 100644 index 000000000..6149c4c94 --- /dev/null +++ b/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs @@ -0,0 +1,353 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace TestCases.XSSF.EventUserModel +{ + using NPOI; + using NPOI.OpenXml4Net.OPC; + using NPOI.Util; + using NPOI.XSSF; + using NPOI.XSSF.EventUserModel; + using NPOI.XSSF.Model; + using NPOI.XSSF.UserModel; + using NUnit.Framework; + using NUnit.Framework.Legacy; + + + /// + /// Tests for + /// + [TestFixture] + public sealed class TestXSSFReader + { + private static POIDataSamples _ssTests = POIDataSamples.GetSpreadSheetInstance(); + + [Test] + public void TestGetBits() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx")); + + XSSFReader r = new XSSFReader(pkg); + + ClassicAssert.IsNotNull(r.WorkbookData); + ClassicAssert.IsNotNull(r.SharedStringsData); + ClassicAssert.IsNotNull(r.StylesData); + + ClassicAssert.IsNotNull(r.SharedStringsTable); + ClassicAssert.IsNotNull(r.StylesTable); + } + + [Test] + public void TestStyles() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx")); + + XSSFReader r = new XSSFReader(pkg); + + ClassicAssert.AreEqual(3, r.StylesTable.Fonts.Count); + ClassicAssert.AreEqual(0, r.StylesTable.NumDataFormats); + + // The Styles Table should have the themes associated with it too + ClassicAssert.IsNotNull(r.StylesTable.Theme); + + // Check we Get valid data for the two + ClassicAssert.IsNotNull(r.StylesData); + ClassicAssert.IsNotNull(r.ThemesData); + } + + [Test] + public void TestStrings() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx")); + + XSSFReader r = new XSSFReader(pkg); + + ClassicAssert.AreEqual(11, r.SharedStringsTable.Items.Count); + ClassicAssert.AreEqual("Test spreadsheet", new XSSFRichTextString(r.SharedStringsTable.GetEntryAt(0)).ToString()); + } + + [Test] + public void TestSheets() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("SampleSS.xlsx")); + + XSSFReader r = new XSSFReader(pkg); + byte[] data = new byte[4096]; + + // By r:id + ClassicAssert.IsNotNull(r.GetSheet("rId2")); + int read = IOUtils.ReadFully(r.GetSheet("rId2"), data); + ClassicAssert.AreEqual(974, read); + + // All + IEnumerator it = r.GetSheetsData(); + + int count = 0; + while(it.MoveNext()) + { + count++; + Stream inp = it.Current; + ClassicAssert.IsNotNull(inp); + read = IOUtils.ReadFully(inp, data); + inp.Close(); + + ClassicAssert.IsTrue(read > 400); + ClassicAssert.IsTrue(read < 1500); + } + ClassicAssert.AreEqual(3, count); + } + + /// + /// Check that the sheet iterator returns sheets in the logical order + /// (as they are defined in the workbook.xml) + /// + [Test] + public void TestOrderOfSheets() + { + + OPCPackage pkg = OPCPackage.Open(_ssTests.OpenResourceAsStream("reordered_sheets.xlsx")); + + XSSFReader r = new XSSFReader(pkg); + + String[] sheetNames = {"Sheet4", "Sheet2", "Sheet3", "Sheet1"}; + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.GetSheetsData(); + + int count = 0; + while(it.MoveNext()) + { + Stream inp = it.Current; + ClassicAssert.IsNotNull(inp); + inp.Close(); + + ClassicAssert.AreEqual(sheetNames[count], it.SheetName); + count++; + } + ClassicAssert.AreEqual(4, count); + } + [Test] + public void TestComments() + { + + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("comments.xlsx"); + XSSFReader r = new XSSFReader(pkg); + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.GetSheetsData(); + + int count = 0; + while(it.MoveNext()) + { + count++; + Stream inp = it.Current; + inp.Close(); + + if(count == 1) + { + ClassicAssert.IsNotNull(it.SheetComments); + CommentsTable ct = it.SheetComments; + ClassicAssert.AreEqual(1, ct.NumberOfAuthors); + ClassicAssert.AreEqual(3, ct.NumberOfComments); + } + else + { + ClassicAssert.IsNull(it.SheetComments); + } + } + ClassicAssert.AreEqual(3, count); + } + + /// + /// Iterating over a workbook with chart sheets in it, using the + /// XSSFReader method + /// + /// Exception + [Test] + public void Test50119() + { + + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("WithChartSheet.xlsx"); + XSSFReader r = new XSSFReader(pkg); + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.GetSheetsData(); + + while(it.MoveNext()) + { + Stream stream = it.Current; + stream.Close(); + } + } + + /// + /// Test text extraction from text box using GetShapes() + /// + /// Exception + [Test] + public void TestShapes() + { + + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("WithTextBox.xlsx"); + XSSFReader r = new XSSFReader(pkg); + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.GetSheetsData(); + + String text = GetShapesString(it); + StringAssert.Contains("Line 1", text); + StringAssert.Contains("Line 2", text); + StringAssert.Contains("Line 3", text); + } + + private String GetShapesString(XSSFReader.SheetIterator it) + { + StringBuilder sb = new StringBuilder(); + while(it.MoveNext()) + { + var _ = it.Current; + List shapes = it.Shapes; + if(shapes != null) + { + foreach(XSSFShape shape in shapes) + { + if(shape is XSSFSimpleShape) + { + String t = ((XSSFSimpleShape) shape).Text; + sb.Append(t).Append('\n'); + } + } + } + } + return sb.ToString(); + } + [Test] + public void TestBug57914() + { + + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("57914.xlsx"); + XSSFReader r; + + // for now expect this to Assert.Fail, when we fix 57699, this one should Assert.Fail so we know we should adjust + // this test as well + try + { + r = new XSSFReader(pkg); + Assert.Fail("This will Assert.Fail until bug 57699 is fixed"); + } + catch(POIXMLException e) + { + StringAssert.Contains("57699", e.Message); + return; + } + + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.GetSheetsData(); + + String text = GetShapesString(it); + StringAssert.Contains("Line 1", text); + StringAssert.Contains("Line 2", text); + StringAssert.Contains("Line 3", text); + } + + /// + /// NPE from XSSFReader$SheetIterator. on XLSX files generated by + /// the openpyxl library + /// + [Test] + public void Test58747() + { + + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("58747.xlsx"); + ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); + ClassicAssert.IsNotNull(strings); + XSSFReader reader = new XSSFReader(pkg); + StylesTable styles = reader.StylesTable; + ClassicAssert.IsNotNull(styles); + + XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.GetSheetsData(); + ClassicAssert.AreEqual(true, iter.MoveNext()); + var _ = iter.Current; + + ClassicAssert.AreEqual(false, iter.MoveNext()); + ClassicAssert.AreEqual("Orders", iter.SheetName); + + pkg.Close(); + } + + /// + /// NPE when sheet has no relationship id in the workbook + /// 60825 + /// + [Test] + public void TestSheetWithNoRelationshipId() + { + + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("60825.xlsx"); + ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); + ClassicAssert.IsNotNull(strings); + XSSFReader reader = new XSSFReader(pkg); + StylesTable styles = reader.StylesTable; + ClassicAssert.IsNotNull(styles); + + XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.GetSheetsData(); + iter.MoveNext(); + ClassicAssert.IsNotNull(iter.Current); + ClassicAssert.IsFalse(iter.MoveNext()); + + pkg.Close(); + } + + /// + /// + /// bug 61304: Call to XSSFReader.SheetsData returns duplicate sheets. + /// + /// + /// The problem seems to be caused only by those xlsx files which have a specific + /// order of the attributes inside the <sheet> tag of workbook.xml + /// + /// + /// Example (which causes the problems): + /// <sheet name="Sheet6" r:id="rId6" sheetId="4"/> + /// + /// + /// While this one works correctly: + /// <sheet name="Sheet6" sheetId="4" r:id="rId6"/> + /// + /// + [Test] + public void Test61034() + { + OPCPackage pkg = XSSFTestDataSamples.OpenSamplePackage("61034.xlsx"); + XSSFReader reader = new XSSFReader(pkg); + XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) reader.GetSheetsData(); + ISet seen = new HashSet(); + while(iter.MoveNext()) + { + Stream stream = iter.Current; + String sheetName = iter.SheetName; + CollectionAssert.DoesNotContain(seen, sheetName); + seen.Add(sheetName); + stream.Close(); + } + pkg.Close(); + } + } +} + diff --git a/testcases/ooxml/XSSF/Extractor/TestXSSFEventBasedExcelExtractor.cs b/testcases/ooxml/XSSF/Extractor/TestXSSFEventBasedExcelExtractor.cs new file mode 100644 index 000000000..76f2e8082 --- /dev/null +++ b/testcases/ooxml/XSSF/Extractor/TestXSSFEventBasedExcelExtractor.cs @@ -0,0 +1,435 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace TestCases.XSSF.Extractor +{ + + using NPOI; + using NPOI.HSSF; + using NPOI.HSSF.Extractor; + using NPOI.XSSF; + using NPOI.XSSF.Extractor; + using NUnit.Framework; + using NUnit.Framework.Legacy; + using System.Text.RegularExpressions; + using TestCases.HSSF; + + /// + /// Tests for + /// + [TestFixture] + public class TestXSSFEventBasedExcelExtractor + { + protected XSSFEventBasedExcelExtractor GetExtractor(String sampleName) + { + + return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.OpenSamplePackage(sampleName)); + } + + /// + /// Get text out of the simple file + /// + [Test] + public void TestGetSimpleText() + { + + // a very simple file + XSSFEventBasedExcelExtractor extractor = GetExtractor("sample.xlsx"); + var _ = extractor.Text; + + String text = extractor.Text; + ClassicAssert.IsTrue(text.Length > 0); + + // Check sheet names + POITestCase.AssertStartsWith(text, "Sheet1"); + POITestCase.AssertEndsWith(text, "Sheet3\n"); + + // Now without, will have text + extractor.IncludeSheetNames = (false); + text = extractor.Text; + String CHUNK1 = + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n"; + String CHUNK2 = + "The quick brown fox jumps over the lazy dog\n" + + "hello, xssf hello, xssf\n" + + "hello, xssf hello, xssf\n" + + "hello, xssf hello, xssf\n" + + "hello, xssf hello, xssf\n"; + ClassicAssert.AreEqual( + CHUNK1 + + "at\t4995\n" + + CHUNK2 + , text); + + // Now Get formulas not their values + extractor.FormulasNotResults = (true); + text = extractor.Text; + ClassicAssert.AreEqual( + CHUNK1 + + "at\tSUM(B1:B9)\n" + + CHUNK2, text); + + // With sheet names too + extractor.IncludeSheetNames = (true); + text = extractor.Text; + ClassicAssert.AreEqual( + "Sheet1\n" + + CHUNK1 + + "at\tSUM(B1:B9)\n" + + "rich test\n" + + CHUNK2 + + "Sheet3\n" + , text); + + extractor.Close(); + } + + [Test] + public void TestGetComplexText() + { + + // A fairly complex file + XSSFEventBasedExcelExtractor extractor = GetExtractor("AverageTaxRates.xlsx"); + var _ = extractor.Text; + + String text = extractor.Text; + ClassicAssert.IsTrue(text.Length > 0); + + // Might not have all formatting it should do! + POITestCase.AssertStartsWith(text, + "Avgtxfull\n" + + "(iii) AVERAGE TAX RATES ON ANNUAL" + ); + + extractor.Close(); + } + + [Test] + public void TestInlineStrings() + { + + XSSFEventBasedExcelExtractor extractor = GetExtractor("InlineStrings.xlsx"); + extractor.FormulasNotResults = (true); + String text = extractor.Text; + + // Numbers + POITestCase.AssertContains(text, "43"); + POITestCase.AssertContains(text, "22"); + + // Strings + POITestCase.AssertContains(text, "ABCDE"); + POITestCase.AssertContains(text, "Long Text"); + + // Inline Strings + POITestCase.AssertContains(text, "1st Inline String"); + POITestCase.AssertContains(text, "And More"); + + // Formulas + POITestCase.AssertContains(text, "A2"); + POITestCase.AssertContains(text, "A5-A$2"); + + extractor.Close(); + } + + /// + /// Test that we return pretty much the same as + /// ExcelExtractor does, when we're both passed + /// the same file, just saved as xls and xlsx + /// + [Test] + public void TestComparedToOLE2() + { + + // A fairly simple file - ooxml + XSSFEventBasedExcelExtractor ooxmlExtractor = GetExtractor("SampleSS.xlsx"); + + ExcelExtractor ole2Extractor = + new ExcelExtractor(HSSFTestDataSamples.OpenSampleWorkbook("SampleSS.xls")); + + POITextExtractor[] extractors = + new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; + foreach(POITextExtractor extractor in extractors) + { + String text = extractor.Text.Replace("\r", "").Replace("\t", ""); + POITestCase.AssertStartsWith(text, "First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"); + Regex pattern = new Regex(".*13(\\.0+)?\\s+Sheet3.*", RegexOptions.Compiled | RegexOptions.Singleline); + Match m = pattern.Match(text); + ClassicAssert.IsTrue(m.Success); + } + + ole2Extractor.Close(); + ooxmlExtractor.Close(); + } + + /// + /// Test text extraction from text box using GetShapes() + /// + /// Exception + [Test] + public void TestShapes() + { + XSSFEventBasedExcelExtractor ooxmlExtractor = GetExtractor("WithTextBox.xlsx"); + try + { + String text = ooxmlExtractor.Text; + StringAssert.Contains("Line 1", text); + StringAssert.Contains("Line 2", text); + StringAssert.Contains("Line 3", text); + } + finally + { + ooxmlExtractor.Close(); + } + } + + /// + /// Test that we return the same output for unstyled numbers as the + /// non-event-based XSSFExcelExtractor. + /// + [Test] + public void TestUnstyledNumbersComparedToNonEventBasedExtractor() + { + String expectedOutput = "Sheet1\n99.99\n"; + XSSFExcelExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.OpenSampleWorkbook("56011.xlsx")); + try + { + ClassicAssert.AreEqual(expectedOutput, extractor.Text.Replace(",", ".")); + } + finally + { + extractor.Close(); + } + + XSSFEventBasedExcelExtractor fixture = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.OpenSamplePackage("56011.xlsx")); + try + { + ClassicAssert.AreEqual(expectedOutput, fixture.Text.Replace(",", ".")); + } + finally + { + fixture.Close(); + } + } + + /// + /// Test that we return the same output headers and footers as the + /// non-event-based XSSFExcelExtractor. + /// + [Test] + public void TestHeadersAndFootersComparedToNonEventBasedExtractor() + { + String expectedOutputWithHeadersAndFooters = + "Sheet1\n" + + "&\"Calibri,Regular\"&K000000top left\t&\"Calibri,Regular\"&K000000top center\t&\"Calibri,Regular\"&K000000top right\n" + + "abc\t123\n" + + "&\"Calibri,Regular\"&K000000bottom left\t&\"Calibri,Regular\"&K000000bottom center\t&\"Calibri,Regular\"&K000000bottom right\n"; + + String expectedOutputWithoutHeadersAndFooters = + "Sheet1\n" + + "abc\t123\n"; + + XSSFExcelExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.OpenSampleWorkbook("headerFooterTest.xlsx")); + try + { + ClassicAssert.AreEqual(expectedOutputWithHeadersAndFooters, extractor.Text); + extractor.IncludeHeadersFooters = (false); + ClassicAssert.AreEqual(expectedOutputWithoutHeadersAndFooters, extractor.Text); + } + finally + { + extractor.Close(); + } + + XSSFEventBasedExcelExtractor fixture = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.OpenSamplePackage("headerFooterTest.xlsx")); + try + { + ClassicAssert.AreEqual(expectedOutputWithHeadersAndFooters, fixture.Text); + fixture.IncludeHeadersFooters = (false); + ClassicAssert.AreEqual(expectedOutputWithoutHeadersAndFooters, fixture.Text); + } + finally + { + fixture.Close(); + } + } + + /// + /// + /// Test that XSSFEventBasedExcelExtractor outputs comments when specified. + /// The output will contain two improvements over the output from + /// XSSFExcelExtractor in that (1) comments from empty cells will be + /// outputted, and (2) the author will not be outputted twice. + /// + /// + /// This test will need to be modified if these improvements are ported to + /// XSSFExcelExtractor. + /// + /// + [Test] + public void TestCommentsComparedToNonEventBasedExtractor() + { + String expectedOutputWithoutComments = + "Sheet1\n" + + "\n" + + "abc\n" + + "\n" + + "123\n" + + "\n" + + "\n" + + "\n"; + + String nonEventBasedExtractorOutputWithComments = + "Sheet1\n" + + "\n" + + "abc Comment by Shaun Kalley: Shaun Kalley: Comment A2\n" + + "\n" + + "123 Comment by Shaun Kalley: Shaun Kalley: Comment B4\n" + + "\n" + + "\n" + + "\n"; + + String eventBasedExtractorOutputWithComments = + "Sheet1\n" + + "Comment by Shaun Kalley: Comment A1\tComment by Shaun Kalley: Comment B1\n" + + "abc Comment by Shaun Kalley: Comment A2\tComment by Shaun Kalley: Comment B2\n" + + "Comment by Shaun Kalley: Comment A3\tComment by Shaun Kalley: Comment B3\n" + + "Comment by Shaun Kalley: Comment A4\t123 Comment by Shaun Kalley: Comment B4\n" + + "Comment by Shaun Kalley: Comment A5\tComment by Shaun Kalley: Comment B5\n" + + "Comment by Shaun Kalley: Comment A7\tComment by Shaun Kalley: Comment B7\n" + + "Comment by Shaun Kalley: Comment A8\tComment by Shaun Kalley: Comment B8\n"; + + XSSFExcelExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.OpenSampleWorkbook("commentTest.xlsx")); + try + { + extractor.AddTabEachEmptyCell = false; + ClassicAssert.AreEqual(expectedOutputWithoutComments, extractor.Text); + extractor.IncludeCellComments = (true); + ClassicAssert.AreEqual(nonEventBasedExtractorOutputWithComments, extractor.Text); + } + finally + { + extractor.Close(); + } + + XSSFEventBasedExcelExtractor fixture = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.OpenSamplePackage("commentTest.xlsx")); + try + { + ClassicAssert.AreEqual(expectedOutputWithoutComments, fixture.Text); + fixture.IncludeCellComments = (true); + ClassicAssert.AreEqual(eventBasedExtractorOutputWithComments, fixture.Text); + } + finally + { + fixture.Close(); + } + } + + [Test] + public void TestFile56278_normal() + { + + // first with normal Text Extractor + POIXMLTextExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.OpenSampleWorkbook("56278.xlsx")); + try + { + ClassicAssert.IsNotNull(extractor.Text); + } + finally + { + extractor.Close(); + } + } + + [Test] + public void TestFile56278_event() + { + + // then with event based one + POIXMLTextExtractor extractor = GetExtractor("56278.xlsx"); + try + { + ClassicAssert.IsNotNull(extractor.Text); + } + finally + { + extractor.Close(); + } + } + + [Test] + public void Test59021() + { + + XSSFEventBasedExcelExtractor ex = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.OpenSamplePackage("59021.xlsx")); + String text = ex.Text; + StringAssert.Contains("Abkhazia - Fixed", text); + StringAssert.Contains("10/02/2016", text); + ex.Close(); + } + + [Test] + public void Test51519() + { + + //default behavior: include phonetic runs + XSSFEventBasedExcelExtractor ex = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.OpenSamplePackage("51519.xlsx")); + String text = ex.Text; + StringAssert.Contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3", text); + ex.Close(); + + //now try turning them off + ex = new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.OpenSamplePackage("51519.xlsx")); + ex.SetConcatenatePhoneticRuns(false); + text = ex.Text; + ClassicAssert.IsFalse(text.Contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3"), + "should not be able to find appended phonetic run"); + ex.Close(); + + } + } +} + diff --git a/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs b/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs index 0c60d000d..23c580f5a 100644 --- a/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs +++ b/testcases/ooxml/XSSF/Extractor/TestXSSFExcelExtractor.cs @@ -55,7 +55,7 @@ public void TestGetSimpleText() ClassicAssert.IsTrue(text.EndsWith("Sheet3\n")); // Now without, will have text - extractor.SetIncludeSheetNames(false); + extractor.IncludeSheetNames = false; text = extractor.Text; string CHUNK1 = "Lorem\t111\n" + @@ -80,7 +80,7 @@ public void TestGetSimpleText() , text); // Now Get formulas not their values - extractor.SetFormulasNotResults(true); + extractor.FormulasNotResults = true; text = extractor.Text; ClassicAssert.AreEqual( CHUNK1 + @@ -88,7 +88,7 @@ public void TestGetSimpleText() CHUNK2, text); // With sheet names too - extractor.SetIncludeSheetNames(true); + extractor.IncludeSheetNames = true; text = extractor.Text; ClassicAssert.AreEqual( "Sheet1\n" + @@ -184,7 +184,7 @@ public void TestComments() ClassicAssert.IsFalse(text.Contains("test phrase"), "Unable to find expected word in text\n" + text); // Turn on comment extraction, will then be - extractor.SetIncludeCellComments(true); + extractor.IncludeCellComments = true; text = extractor.Text; ClassicAssert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text\n" + text); ClassicAssert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text); @@ -256,7 +256,7 @@ public void TestTextBoxes() XSSFExcelExtractor extractor = GetExtractor("WithTextBox.xlsx"); try { - extractor.SetFormulasNotResults(true); + extractor.FormulasNotResults = true; string text = extractor.Text; ClassicAssert.IsTrue(text.IndexOf("Line 1") > -1); ClassicAssert.IsTrue(text.IndexOf("Line 2") > -1); diff --git a/testcases/ooxml/XSSF/Model/TestCommentsTable.cs b/testcases/ooxml/XSSF/Model/TestCommentsTable.cs index bde3999bb..c8fcfd9d8 100644 --- a/testcases/ooxml/XSSF/Model/TestCommentsTable.cs +++ b/testcases/ooxml/XSSF/Model/TestCommentsTable.cs @@ -38,7 +38,7 @@ public class TestCommentsTable public void FindAuthor() { CommentsTable sheetComments = new CommentsTable(); - ClassicAssert.AreEqual(1, sheetComments.GetNumberOfAuthors()); + ClassicAssert.AreEqual(1, sheetComments.NumberOfAuthors); ClassicAssert.AreEqual(0, sheetComments.FindAuthor("")); ClassicAssert.AreEqual("", sheetComments.GetAuthor(0)); @@ -205,22 +205,22 @@ public void RemoveComment() ClassicAssert.AreSame(a1, sheetComments.GetCTComment(addrA1)); ClassicAssert.AreSame(a2, sheetComments.GetCTComment(addrA2)); ClassicAssert.AreSame(a3, sheetComments.GetCTComment(addrA3)); - ClassicAssert.AreEqual(3, sheetComments.GetNumberOfComments()); + ClassicAssert.AreEqual(3, sheetComments.NumberOfComments); ClassicAssert.IsTrue(sheetComments.RemoveComment(addrA1)); - ClassicAssert.AreEqual(2, sheetComments.GetNumberOfComments()); + ClassicAssert.AreEqual(2, sheetComments.NumberOfComments); ClassicAssert.IsNull(sheetComments.GetCTComment(addrA1)); ClassicAssert.AreSame(a2, sheetComments.GetCTComment(addrA2)); ClassicAssert.AreSame(a3, sheetComments.GetCTComment(addrA3)); ClassicAssert.IsTrue(sheetComments.RemoveComment(addrA2)); - ClassicAssert.AreEqual(1, sheetComments.GetNumberOfComments()); + ClassicAssert.AreEqual(1, sheetComments.NumberOfComments); ClassicAssert.IsNull(sheetComments.GetCTComment(addrA1)); ClassicAssert.IsNull(sheetComments.GetCTComment(addrA2)); ClassicAssert.AreSame(a3, sheetComments.GetCTComment(addrA3)); ClassicAssert.IsTrue(sheetComments.RemoveComment(addrA3)); - ClassicAssert.AreEqual(0, sheetComments.GetNumberOfComments()); + ClassicAssert.AreEqual(0, sheetComments.NumberOfComments); ClassicAssert.IsNull(sheetComments.GetCTComment(addrA1)); ClassicAssert.IsNull(sheetComments.GetCTComment(addrA2)); ClassicAssert.IsNull(sheetComments.GetCTComment(addrA3)); diff --git a/testcases/ooxml/XSSF/Model/TestStylesTable.cs b/testcases/ooxml/XSSF/Model/TestStylesTable.cs index 6e36cfa9b..87f5bbf1a 100644 --- a/testcases/ooxml/XSSF/Model/TestStylesTable.cs +++ b/testcases/ooxml/XSSF/Model/TestStylesTable.cs @@ -100,7 +100,7 @@ public void doTestExisting(StylesTable st) ClassicAssert.AreEqual(1, st.StyleXfsSize); ClassicAssert.AreEqual(8, st.NumDataFormats); - ClassicAssert.AreEqual(2, st.GetFonts().Count); + ClassicAssert.AreEqual(2, st.Fonts.Count); ClassicAssert.AreEqual(2, st.GetFills().Count); ClassicAssert.AreEqual(1, st.GetBorders().Count); diff --git a/testcases/ooxml/XSSF/Model/TestThemesTable.cs b/testcases/ooxml/XSSF/Model/TestThemesTable.cs index d18bd9f30..a089d1aa6 100644 --- a/testcases/ooxml/XSSF/Model/TestThemesTable.cs +++ b/testcases/ooxml/XSSF/Model/TestThemesTable.cs @@ -249,16 +249,16 @@ public void TestAddNew() ClassicAssert.AreEqual(null, wb.GetTheme()); StylesTable styles = wb.GetStylesSource(); - ClassicAssert.AreEqual(null, styles.GetTheme()); + ClassicAssert.AreEqual(null, styles.Theme); styles.EnsureThemesTable(); - ClassicAssert.IsNotNull(styles.GetTheme()); + ClassicAssert.IsNotNull(styles.Theme); ClassicAssert.IsNotNull(wb.GetTheme()); wb = XSSFTestDataSamples.WriteOutAndReadBack(wb) as XSSFWorkbook; styles = wb.GetStylesSource(); - ClassicAssert.IsNotNull(styles.GetTheme()); + ClassicAssert.IsNotNull(styles.Theme); ClassicAssert.IsNotNull(wb.GetTheme()); } } diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs index 4bef56af4..77f454b42 100644 --- a/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs +++ b/testcases/ooxml/XSSF/UserModel/TestXSSFBugs.cs @@ -1415,7 +1415,7 @@ public void Test51850() // Sheet 2 has comments ClassicAssert.IsNotNull(sh2.GetCommentsTable(false)); - ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).GetNumberOfComments()); + ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).NumberOfComments); // Sheet 1 doesn't (yet) ClassicAssert.IsNull(sh1.GetCommentsTable(false)); @@ -1464,10 +1464,10 @@ public void Test51850() // Check the comments ClassicAssert.IsNotNull(sh2.GetCommentsTable(false)); - ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).GetNumberOfComments()); + ClassicAssert.AreEqual(1, sh2.GetCommentsTable(false).NumberOfComments); ClassicAssert.IsNotNull(sh1.GetCommentsTable(false)); - ClassicAssert.AreEqual(2, sh1.GetCommentsTable(false).GetNumberOfComments()); + ClassicAssert.AreEqual(2, sh1.GetCommentsTable(false).NumberOfComments); wb2.Close(); } diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs index 548648621..3c217cb68 100644 --- a/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs +++ b/testcases/ooxml/XSSF/UserModel/TestXSSFComment.cs @@ -53,7 +53,7 @@ public void Constructor() ClassicAssert.IsNotNull(sheetComments.GetCTComments().commentList); ClassicAssert.IsNotNull(sheetComments.GetCTComments().authors); ClassicAssert.AreEqual(1, sheetComments.GetCTComments().authors.SizeOfAuthorArray()); - ClassicAssert.AreEqual(1, sheetComments.GetNumberOfAuthors()); + ClassicAssert.AreEqual(1, sheetComments.NumberOfAuthors); CT_Comment ctComment = sheetComments.NewComment(CellAddress.A1); CT_Shape vmlShape = new CT_Shape(); @@ -167,17 +167,17 @@ public void Author() CommentsTable sheetComments = new CommentsTable(); CT_Comment ctComment = sheetComments.NewComment(CellAddress.A1); - ClassicAssert.AreEqual(1, sheetComments.GetNumberOfAuthors()); + ClassicAssert.AreEqual(1, sheetComments.NumberOfAuthors); XSSFComment comment = new XSSFComment(sheetComments, ctComment, null); ClassicAssert.AreEqual("", comment.Author); comment.Author = ("Apache POI"); ClassicAssert.AreEqual("Apache POI", comment.Author); - ClassicAssert.AreEqual(2, sheetComments.GetNumberOfAuthors()); + ClassicAssert.AreEqual(2, sheetComments.NumberOfAuthors); comment.Author = ("Apache POI"); - ClassicAssert.AreEqual(2, sheetComments.GetNumberOfAuthors()); + ClassicAssert.AreEqual(2, sheetComments.NumberOfAuthors); comment.Author = (""); ClassicAssert.AreEqual("", comment.Author); - ClassicAssert.AreEqual(2, sheetComments.GetNumberOfAuthors()); + ClassicAssert.AreEqual(2, sheetComments.NumberOfAuthors); } [Test] diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs index 054f0ab77..cac07ef33 100644 --- a/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs +++ b/testcases/ooxml/XSSF/UserModel/TestXSSFWorkbook.cs @@ -279,7 +279,7 @@ public void Styles() // Has 8 number formats ClassicAssert.AreEqual(8, st.NumDataFormats); // Has 2 fonts - ClassicAssert.AreEqual(2, st.GetFonts().Count); + ClassicAssert.AreEqual(2, st.Fonts.Count); // Has 2 Fills ClassicAssert.AreEqual(2, st.GetFills().Count); // Has 1 border @@ -303,7 +303,7 @@ public void Styles() ClassicAssert.IsNotNull(ss); ClassicAssert.AreEqual(10, st.NumDataFormats); - ClassicAssert.AreEqual(2, st.GetFonts().Count); + ClassicAssert.AreEqual(2, st.Fonts.Count); ClassicAssert.AreEqual(2, st.GetFills().Count); ClassicAssert.AreEqual(1, st.GetBorders().Count); diff --git a/testcases/test-data/spreadsheet/60825.xlsx b/testcases/test-data/spreadsheet/60825.xlsx new file mode 100644 index 000000000..ffcfe08e6 Binary files /dev/null and b/testcases/test-data/spreadsheet/60825.xlsx differ diff --git a/testcases/test-data/spreadsheet/61034.xlsx b/testcases/test-data/spreadsheet/61034.xlsx new file mode 100644 index 000000000..cd2c5e564 Binary files /dev/null and b/testcases/test-data/spreadsheet/61034.xlsx differ