diff --git a/Directory.Packages.props b/Directory.Packages.props index fcb4b3b48..7b9702770 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -14,7 +14,6 @@ - diff --git a/ooxml/NPOI.OOXML.Core.csproj b/ooxml/NPOI.OOXML.Core.csproj index fb7a63b6a..d8dbe05fe 100644 --- a/ooxml/NPOI.OOXML.Core.csproj +++ b/ooxml/NPOI.OOXML.Core.csproj @@ -25,7 +25,6 @@ - diff --git a/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs b/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs index a21a2f8b7..9cd53fa12 100644 --- a/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs +++ b/ooxml/XSSF/EventUserModel/ReadOnlySharedStringsTable.cs @@ -22,8 +22,7 @@ namespace NPOI.XSSF.EventUserModel using System.Collections.Generic; using System.IO; using System.Text; - using NSAX; - using NSAX.Helpers; + using System.Xml; /// /// @@ -72,7 +71,7 @@ namespace NPOI.XSSF.EventUserModel /// /// /// - public class ReadOnlySharedStringsTable : DefaultHandler + public class ReadOnlySharedStringsTable { private bool includePhoneticRuns; @@ -106,7 +105,6 @@ public class ReadOnlySharedStringsTable : DefaultHandler /// /// The to use as basis for the shared-strings table. /// If reading the data from the package fails. - /// if parsing the XML data fails. public ReadOnlySharedStringsTable(OPCPackage pkg) : this(pkg, true) { @@ -117,8 +115,6 @@ public ReadOnlySharedStringsTable(OPCPackage pkg) /// The to use as basis for the shared-strings table. /// whether or not to concatenate phoneticRuns onto the shared string /// IOException If reading the data from the package fails. - /// SAXException if parsing the XML data fails. - /// @since POI 3.14-Beta3 public ReadOnlySharedStringsTable(OPCPackage pkg, bool includePhoneticRuns) { this.includePhoneticRuns = includePhoneticRuns; @@ -153,9 +149,7 @@ public ReadOnlySharedStringsTable(PackagePart part) /// /// @since POI 3.14-Beta3 public ReadOnlySharedStringsTable(PackagePart part, bool includePhoneticRuns) - { - this.includePhoneticRuns = includePhoneticRuns; ReadFrom(part.GetInputStream()); } @@ -165,27 +159,32 @@ public ReadOnlySharedStringsTable(PackagePart part, bool includePhoneticRuns) /// /// The input stream containing the XML document. /// if an error occurs while reading. - /// if parsing the XML data fails. public void ReadFrom(Stream is1) { // test if the file is empty, otherwise parse it - //PushbackInputStream pis = new PushbackInputStream(is1, 1); - //int emptyTest = pis.Read(); - //if (emptyTest > -1) if(is1.Length > 0) { - //pis.Unread(emptyTest); - InputSource sheetSource = new InputSource(is1); - //try + XmlReaderSettings settings = new XmlReaderSettings(); + settings.DtdProcessing = DtdProcessing.Ignore; + var reader = XmlReader.Create(is1, settings); + while(reader.Read()) { - NSAX.AElfred.SAXDriver sheetParser = new NSAX.AElfred.SAXDriver(); - sheetParser.ContentHandler = (this); - sheetParser.Parse(sheetSource); + if(reader.NodeType == XmlNodeType.Element) + { + //begin element + StartElement(reader); + } + else if(reader.NodeType == XmlNodeType.EndElement) + { + EndElement(reader); + } + else if(reader.NodeType == XmlNodeType.Text || + reader.NodeType == XmlNodeType.SignificantWhitespace || + reader.NodeType == XmlNodeType.Whitespace) + { + TextNode(reader); + } } - //catch (ParserConfigurationException e) - //{ - // throw new RuntimeException("SAX parser appears to be broken - " + e.GetMessage()); - //} } } @@ -224,85 +223,85 @@ public String GetEntryAt(int idx) private bool tIsOpen; private bool inRPh; - public override void StartElement(String uri, String localName, String name, - IAttributes attributes) + public void TextNode(XmlReader reader) { + if(tIsOpen) + { + if(inRPh && includePhoneticRuns) + { + characters.Append(reader.Value); + } + else if(!inRPh) + { + characters.Append(reader.Value); + } + } + } - if (uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) + public void StartElement(XmlReader reader) + { + string uri = reader.NamespaceURI; + string localName = reader.LocalName; + //string name = reader.Name; + if(uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) { return; } - - if ("sst".Equals(localName)) + if("sst".Equals(localName)) { - String count = attributes.GetValue("count"); - if (count != null) this.count = Int32.Parse(count); - String uniqueCount = attributes.GetValue("uniqueCount"); - if (uniqueCount != null) this.uniqueCount = Int32.Parse(uniqueCount); + String count = reader.GetAttribute("count"); + if(count != null) + this.count = Int32.Parse(count); + String uniqueCount = reader.GetAttribute("uniqueCount"); + if(uniqueCount != null) + this.uniqueCount = Int32.Parse(uniqueCount); this.strings = new List(this.uniqueCount); this.phoneticStrings = new Dictionary(); characters = new StringBuilder(); } - else if ("si".Equals(localName)) + else if("si".Equals(localName)) { characters.Length = 0; } - else if ("t".Equals(localName)) + else if("t".Equals(localName)) { tIsOpen = true; } - else if ("rPh".Equals(localName)) + else if("rPh".Equals(localName)) { inRPh = true; //append space...this assumes that rPh always comes After regular - if (includePhoneticRuns && characters.Length > 0) + if(includePhoneticRuns && characters.Length > 0) { characters.Append(" "); } } } - public override void EndElement(String uri, String localName, String name) - + public void EndElement(XmlReader reader) { - - if (uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) + string uri = reader.NamespaceURI; + string localName = reader.LocalName; + //string name = reader.Name; + if(uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) { return; } - if ("si".Equals(localName)) + if("si".Equals(localName)) { strings.Add(characters.ToString()); } - else if ("t".Equals(localName)) + else if("t".Equals(localName)) { tIsOpen = false; } - else if ("rPh".Equals(localName)) + else if("rPh".Equals(localName)) { inRPh = false; } } - - /// - /// Captures characters only if a t(ext) element is open. - /// - public override void Characters(char[] ch, int start, int length) - { - if (tIsOpen) - { - if (inRPh && includePhoneticRuns) - { - characters.Append(ch, start, length); - } - else if (!inRPh) - { - characters.Append(ch, start, length); - } - } - } } } diff --git a/ooxml/XSSF/EventUserModel/XSSFReader.cs b/ooxml/XSSF/EventUserModel/XSSFReader.cs index f7a87b2cf..dcf3c40cc 100644 --- a/ooxml/XSSF/EventUserModel/XSSFReader.cs +++ b/ooxml/XSSF/EventUserModel/XSSFReader.cs @@ -19,19 +19,15 @@ limitations under the License. using System.Collections; using System.Collections.Generic; using System.IO; -using System.Text; namespace NPOI.XSSF.EventUserModel { - using NPOI; using NPOI.OpenXml4Net.Exceptions; using NPOI.OpenXml4Net.OPC; using NPOI.Util; using NPOI.XSSF.Model; using NPOI.XSSF.UserModel; - using NSAX; - using NSAX.Helpers; using System.Xml; /// @@ -247,29 +243,27 @@ internal SheetIterator(PackagePart wb) public virtual List CreateSheetIteratorFromWB(PackagePart wb) { XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader(); - NSAX.AElfred.SAXDriver xmlReader; try { - xmlReader = new NSAX.AElfred.SAXDriver();// SAXHelper.newXMLReader(); + XmlReaderSettings settings = new XmlReaderSettings(); + settings.DtdProcessing = DtdProcessing.Ignore; + var reader = XmlReader.Create(wb.GetInputStream(), settings); + while(reader.Read()) + { + if(reader.NodeType == XmlNodeType.Element) + { + xmlSheetRefReader.StartElement(reader); + } + } } - //catch (ParserConfigurationException e) - //{ - // throw new POIXMLException(e); - //} - catch (SAXException e) + catch(XmlException e) { throw new POIXMLException(e); } - xmlReader.ContentHandler = (xmlSheetRefReader); - try - { - xmlReader.Parse(new InputSource(wb.GetInputStream())); - } - catch (SAXException e) + catch(InvalidOperationException e) { throw new POIXMLException(e); } - List validSheets = new List(); foreach (XSSFSheetRef xssfSheetRef in xmlSheetRefReader.GetSheetRefs()) { @@ -456,39 +450,28 @@ public XSSFSheetRef(String id, String name) } //scrapes sheet reference info and order from workbook.xml - private class XMLSheetRefReader : DefaultHandler + private class XMLSheetRefReader { private static String SHEET = "sheet"; private static String ID = "id"; private static String NAME = "name"; - + private List sheetRefs = new List(); // read // and add XSSFSheetRef(id="rId6", name="Sheet6") to sheetRefs - public override void StartElement(String uri, String localName, String qName, IAttributes attrs) + public void StartElement(XmlReader reader) { - - if (localName.Equals(SHEET, StringComparison.OrdinalIgnoreCase)) + string uri = reader.NamespaceURI; + string localName = reader.LocalName; + if(localName.Equals(SHEET, StringComparison.OrdinalIgnoreCase)) { - String name = null; - String id = null; - for (int i = 0; i < attrs.Length; i++) + String name = reader.GetAttribute(NAME); + String id = reader.GetAttribute(ID, PackageNamespaces.SCHEMA_RELATIONSHIPS); + + if(name != null && id != null) { - String attrName = attrs.GetLocalName(i); - if (attrName.Equals(NAME, StringComparison.OrdinalIgnoreCase)) - { - name = attrs.GetValue(i); - } - else if (attrName.Equals(ID, StringComparison.OrdinalIgnoreCase)) - { - id = attrs.GetValue(i); - } - if (name != null && id != null) - { - sheetRefs.Add(new XSSFSheetRef(id, name)); - break; - } + sheetRefs.Add(new XSSFSheetRef(id, name)); } } } diff --git a/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs b/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs index e584b07f1..a8d27306d 100644 --- a/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs +++ b/ooxml/XSSF/EventUserModel/XSSFSheetXMLHandler.cs @@ -23,24 +23,19 @@ limitations under the License. namespace NPOI.XSSF.EventUserModel { - using static NPOI.XSSF.UserModel.XSSFRelation; - - + using NPOI.OpenXmlFormats.Spreadsheet; using NPOI.SS.UserModel; using NPOI.SS.Util; - using NPOI.Util; using NPOI.XSSF.Model; using NPOI.XSSF.UserModel; - using NSAX.Helpers; - using NPOI.OpenXmlFormats.Spreadsheet; - using NSAX; + using System.Xml; /// /// This class handles the processing of a sheet#.xml /// sheet part of a XSSF .xlsx file, and generates /// row and cell events for it. /// - public class XSSFSheetXMLHandler : DefaultHandler + public class XSSFSheetXMLHandler { //private static POILogger logger = POILogFactory.GetLogger(XSSFSheetXMLHandler.class); @@ -196,12 +191,11 @@ private bool IsTextTag(String name) // It isn't a text tag return false; } - public override void StartElement(String uri, String localName, String qName, - IAttributes attributes) + internal void StartElement(XmlReader reader) { - - - if(uri != null && !uri.Equals(NS_SPREADSHEETML)) + string uri = reader.NamespaceURI; + string localName = reader.LocalName; + if(uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) { return; } @@ -229,12 +223,12 @@ public override void StartElement(String uri, String localName, String qName, } // Decide where to Get the formula string from - String type = attributes.GetValue("t"); + String type = reader.GetAttribute("t"); if(type != null && type.Equals("shared")) { // Is it the one that defines the shared, or uses it? - String ref1 = attributes.GetValue("ref"); - String si = attributes.GetValue("si"); + String ref1 = reader.GetAttribute("ref"); + String si = reader.GetAttribute("si"); if(ref1 != null) { @@ -272,7 +266,7 @@ public override void StartElement(String uri, String localName, String qName, } else if("row".Equals(localName)) { - String rowNumStr = attributes.GetValue("r"); + String rowNumStr = reader.GetAttribute("r"); if(rowNumStr != null) { rowNum = Int32.Parse(rowNumStr) - 1; @@ -290,9 +284,9 @@ public override void StartElement(String uri, String localName, String qName, this.nextDataType = XSSFDataType.Number; this.formatIndex = -1; this.formatString = null; - cellRef = attributes.GetValue("r"); - String cellType = attributes.GetValue("t"); - String cellStyleStr = attributes.GetValue("s"); + cellRef = reader.GetAttribute("r"); + String cellType = reader.GetAttribute("t"); + String cellStyleStr = reader.GetAttribute("s"); if("b".Equals(cellType)) nextDataType = XSSFDataType.Boolean; else if("e".Equals(cellType)) @@ -329,12 +323,12 @@ public override void StartElement(String uri, String localName, String qName, } } } - public override void EndElement(String uri, String localName, String qName) - + internal void EndElement(XmlReader reader) { + string uri = reader.NamespaceURI; + string localName = reader.LocalName; - - if(uri != null && !uri.Equals(NS_SPREADSHEETML)) + if(uri != null && !uri.Equals(XSSFRelation.NS_SPREADSHEETML)) { return; } @@ -469,28 +463,21 @@ public override void EndElement(String uri, String localName, String qName) } } - /// - /// Captures characters only if a suitable element is open. - /// Originally was just "v"; extended for inlineStr also. - /// - public override void Characters(char[] ch, int start, int length) - + internal void TextNode(XmlReader reader) { - if(vIsOpen) { - value.Append(ch, start, length); + value.Append(reader.Value); } if(fIsOpen) { - formula.Append(ch, start, length); + formula.Append(reader.Value); } if(hfIsOpen) { - headerFooter.Append(ch, start, length); + headerFooter.Append(reader.Value); } } - /// /// Do a check for, and output, comments in otherwise empty cells. /// diff --git a/ooxml/XSSF/Extractor/XSSFBEventBasedExcelExtractor.cs b/ooxml/XSSF/Extractor/XSSFBEventBasedExcelExtractor.cs index 47fc22113..47443176d 100644 --- a/ooxml/XSSF/Extractor/XSSFBEventBasedExcelExtractor.cs +++ b/ooxml/XSSF/Extractor/XSSFBEventBasedExcelExtractor.cs @@ -17,7 +17,6 @@ limitations under the License. using NPOI.OpenXml4Net; using NPOI.SS.Extractor; -using NSAX; using System; using System.Collections; using System.Collections.Generic; @@ -26,14 +25,12 @@ limitations under the License. namespace NPOI.XSSF.Extractor { - - using NPOI; using NPOI.OpenXml4Net.OPC; using NPOI.SS.UserModel; - using NPOI.Util; using NPOI.XSSF.Binary; using NPOI.XSSF.EventUserModel; using NPOI.XSSF.UserModel; + using System.Xml; /// /// Implementation of a text extractor or xlsb Excel @@ -177,11 +174,11 @@ public String GetText() //LOGGER.log(POILogger.WARN, e); return null; } - catch(SAXException) - { - //LOGGER.log(POILogger.WARN, se); - return null; - } + //catch(XmlException) + //{ + // //LOGGER.log(POILogger.WARN, se); + // return null; + //} catch(OpenXml4NetException) { //LOGGER.log(POILogger.WARN, o4je); diff --git a/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs b/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs index 270f05cfb..637e6ce22 100644 --- a/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs +++ b/ooxml/XSSF/Extractor/XSSFEventBasedExcelExtractor.cs @@ -34,10 +34,9 @@ namespace NPOI.XSSF.Extractor using NPOI.XSSF.Model; using NPOI.XSSF.UserModel; using System.Globalization; - using NSAX; - using NSAX.AElfred; using static NPOI.XSSF.EventUserModel.XSSFSheetXMLHandler; using NPOI.OpenXml4Net; + using System.Xml; /// /// Implementation of a text extractor from OOXML Excel @@ -203,10 +202,7 @@ public void ProcessSheet( CommentsTable comments, ReadOnlySharedStringsTable strings, Stream sheetInputStream) - { - - DataFormatter formatter; if(locale == null) { @@ -217,18 +213,39 @@ public void ProcessSheet( formatter = new DataFormatter(locale); } - InputSource sheetSource = new InputSource(sheetInputStream); try { - SAXDriver sheetParser = new SAXDriver(); - IContentHandler handler = new XSSFSheetXMLHandler( - styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults); - sheetParser.ContentHandler = (handler); - sheetParser.Parse(sheetSource); + var handler = new XSSFSheetXMLHandler( + styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults); + XmlReaderSettings settings = new XmlReaderSettings(); + settings.DtdProcessing = DtdProcessing.Ignore; + var reader = XmlReader.Create(sheetInputStream, settings); + while(reader.Read()) + { + if(reader.NodeType == XmlNodeType.Element) + { + handler.StartElement(reader); + if(reader.IsEmptyElement) //empty tag, perhaps some special handling is needed + { + handler.EndElement(reader); + } + } + else if(reader.NodeType == XmlNodeType.EndElement) + { + handler.EndElement(reader); + } + else if(reader.NodeType == XmlNodeType.Text || + reader.NodeType == XmlNodeType.SignificantWhitespace || + reader.NodeType == XmlNodeType.Whitespace) + { + var value = reader.Value; + handler.TextNode(reader); + } + } } - catch(SAXException e) + catch(XmlException e) { - throw new RuntimeException("SAX parser appears to be broken - " + e.Message); + throw new RuntimeException("XML parser appears to be broken - " + e.Message); } } @@ -283,11 +300,6 @@ public override String Text //LOGGER.log(POILogger.WARN, e); return null; } - catch(SAXException) - { - //LOGGER.log(POILogger.WARN, se); - return null; - } catch(OpenXml4NetException) { //LOGGER.log(POILogger.WARN, o4je); diff --git a/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs b/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs index 168f5b263..a1f4ec4f2 100644 --- a/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs +++ b/testcases/ooxml/XSSF/EventUserModel/TestXSSFReader.cs @@ -351,7 +351,6 @@ public void Test61034() pkg.Close(); } [Test] - [Ignore("until we fix issue https://bz.apache.org/bugzilla/show_bug.cgi?id=61701")] public void Test61701() { XSSFWorkbook workbook = XSSFTestDataSamples.OpenSampleWorkbook("simple-table-named-range.xlsx");