diff --git a/OpenXmlFormats/Spreadsheet/Sheet.cs b/OpenXmlFormats/Spreadsheet/Sheet.cs index c53947267..3ea11b4c9 100644 --- a/OpenXmlFormats/Spreadsheet/Sheet.cs +++ b/OpenXmlFormats/Spreadsheet/Sheet.cs @@ -10726,6 +10726,19 @@ public void SetHyperlinkArray(CT_Hyperlink[] array) { hyperlinkField = new List(array); } + + public int SizeOfHyperlinkArray() + { + return this.hyperlinkField == null ? 0 : this.hyperlinkField.Count; + } + + public void RemoveHyperlink(int index) + { + if (this.hyperlink == null) + return; + this.hyperlinkField.RemoveAt(index); + } + [XmlElement("hyperlink", IsNullable = false)] public List hyperlink { diff --git a/OpenXmlFormats/Spreadsheet/Sheet/CT_Worksheet.cs b/OpenXmlFormats/Spreadsheet/Sheet/CT_Worksheet.cs index 12d4819aa..007175d33 100644 --- a/OpenXmlFormats/Spreadsheet/Sheet/CT_Worksheet.cs +++ b/OpenXmlFormats/Spreadsheet/Sheet/CT_Worksheet.cs @@ -425,6 +425,12 @@ public bool IsSetDimension() { return this.dimensionField != null; } + + public void UnsetHyperlinks() + { + this.hyperlinksField = null; + } + public CT_SheetProtection AddNewSheetProtection() { this.sheetProtectionField = new CT_SheetProtection(); @@ -1025,6 +1031,7 @@ public CT_IgnoredErrors AddNewIgnoredErrors() this.ignoredErrorsField = new CT_IgnoredErrors(); return this.ignoredErrorsField; } + } } diff --git a/main/SS/Formula/FormulaParser.cs b/main/SS/Formula/FormulaParser.cs index 2467d4969..0903c45f3 100644 --- a/main/SS/Formula/FormulaParser.cs +++ b/main/SS/Formula/FormulaParser.cs @@ -31,6 +31,7 @@ namespace NPOI.SS.Formula using NPOI.SS.UserModel; using NPOI.SS.Util; using NPOI.Util; + using SixLabors.Fonts.Unicode; /// /// Specific exception thrown when a supplied formula does not Parse properly. @@ -63,6 +64,7 @@ public class FormulaParser { private String _formulaString; private int _formulaLength; + /** points at the next character to be read (after the {@link #look} codepoint) */ private int _pointer; private ParseNode _rootNode; @@ -72,10 +74,10 @@ public class FormulaParser private const char LF = '\n'; // Normally just XSSF /** - * Lookahead Character. + * Lookahead unicode codepoint. * Gets value '\0' when the input string is exhausted */ - private char look; + private int look; /** * Tracks whether the run of whitespace preceeding "look" could be an @@ -183,7 +185,7 @@ private void GetChar() } if (_pointer < _formulaLength) { - look = _formulaString[_pointer]; + look = _formulaString.CodePointAt(_pointer); } else { @@ -192,7 +194,7 @@ private void GetChar() look = (char)0; _inIntersection = false; } - _pointer++; + _pointer+= StringUtil.CharCount(look); //Console.WriteLine("Got char: "+ look); } @@ -208,7 +210,7 @@ private Exception expected(String s) } else { - msg = "Parse error near char " + (_pointer - 1) + " '" + look + "'" + msg = "Parse error near char " + (_pointer - 1) + " '" + char.ConvertFromUtf32(look) + "'" + " in specified formula '" + _formulaString + "'. Expected " + s; } @@ -216,29 +218,39 @@ private Exception expected(String s) } /** Recognize an Alpha Character */ - private static bool IsAlpha(char c) + private static bool IsAlpha(int c) { - return Char.IsLetter(c) || c == '$' || c == '_'; + return CodePoint.IsLetter(new CodePoint(c)) || c == '$' || c == '_'; + } + + private static bool IsLetter(int c) + { + return CodePoint.IsLetter(new CodePoint(c)); } /** Recognize a Decimal Digit */ - private static bool IsDigit(char c) + private static bool IsDigit(int c) { - return Char.IsDigit(c); + return CodePoint.IsDigit(new CodePoint(c)); } /** Recognize an Alphanumeric */ - private static bool IsAlNum(char c) + private static bool IsAlNum(int c) { return IsAlpha(c) || IsDigit(c); } /** Recognize White Space */ - private static bool IsWhite(char c) + private static bool IsWhite(int c) { return c == ' ' || c == TAB || c == CR || c == LF; } + private static bool IsLetterOrDigit(int c) + { + return CodePoint.IsLetterOrDigit(new CodePoint(c)); + } + /** Skip Over Leading White Space */ private void SkipWhite() { @@ -253,7 +265,7 @@ private void SkipWhite() * unchecked exception. This method does not consume whitespace (before or after the * matched character). */ - private void Match(char x) + private void Match(int x) { if (look != x) { @@ -268,9 +280,9 @@ private String ParseUnquotedIdentifier() throw expected("unquoted identifier"); } StringBuilder sb = new StringBuilder(); - while (Char.IsLetterOrDigit(look) || look == '.') + while (IsLetterOrDigit(look) || look == '.') { - sb.Append(look); + sb.Append(char.ConvertFromUtf32(look)); GetChar(); } if (sb.Length < 1) @@ -285,9 +297,9 @@ private String GetNum() { StringBuilder value = new StringBuilder(); - while (IsDigit(this.look)) + while (IsDigit(look)) { - value.Append(this.look); + value.Append(char.ConvertFromUtf32(look)); GetChar(); } return value.Length == 0 ? null : value.ToString(); @@ -383,9 +395,15 @@ private static bool NeedsMemFunc(ParseNode root) * * @return true if the specified character may be used in a defined name */ - private static bool IsValidDefinedNameChar(char ch) + private static bool IsValidDefinedNameChar(int ch) { - if (Char.IsLetterOrDigit(ch)) + if (IsLetterOrDigit(ch)) + { + return true; + } + // the sheet naming rules are vague on whether unicode characters are allowed + // assume they're allowed. + if (ch > 128) { return true; } @@ -397,6 +415,7 @@ private static bool IsValidDefinedNameChar(char ch) case '\\': // of all things return true; } + // includes special non-name control characters like ! $ : , ( ) [ ] and space return false; } /** @@ -982,7 +1001,7 @@ private String ParseAsColumnQuantifier() StringBuilder name = new StringBuilder(); while (look != ']') { - name.Append(look); + name.Append(char.ConvertFromUtf32(look)); GetChar(); } Match(']'); @@ -1030,7 +1049,7 @@ private ParseNode ParseNonRange(int savePointer) { ResetPointer(savePointer); - if (Char.IsDigit(look)) + if (IsDigit(look)) { return new ParseNode(ParseNumber()); } @@ -1083,13 +1102,13 @@ private String ParseAsName() StringBuilder sb = new StringBuilder(); // defined names may begin with a letter or underscore or backslash - if (!char.IsLetter(look) && look != '_' && look != '\\') + if (!IsLetter(look) && look != '_' && look != '\\') { throw expected("number, string, defined name, or data table"); } while (IsValidDefinedNameChar(look)) { - sb.Append(look); + sb.Append(char.ConvertFromUtf32(look)); GetChar(); } SkipWhite(); @@ -1400,7 +1419,7 @@ private String GetBookName() GetChar(); while (look != ']') { - sb.Append(look); + sb.Append(char.ConvertFromUtf32(look)); GetChar(); } GetChar(); @@ -1435,7 +1454,7 @@ private SheetIdentifier ParseSheetName() bool done = look == '\''; while (!done) { - sb.Append(look); + sb.Append(char.ConvertFromUtf32(look)); GetChar(); if (look == '\'') { @@ -1461,13 +1480,13 @@ private SheetIdentifier ParseSheetName() } // unquoted sheet names must start with underscore or a letter - if (look == '_' || Char.IsLetter(look)) + if (look == '_' || IsLetter(look)) { StringBuilder sb = new StringBuilder(); // can concatenate idens with dots while (IsUnquotedSheetNameChar(look)) { - sb.Append(look); + sb.Append(char.ConvertFromUtf32(look)); GetChar(); } NameIdentifier iden = new NameIdentifier(sb.ToString(), false); @@ -1511,9 +1530,15 @@ private SheetIdentifier ParseSheetRange(String bookname, NameIdentifier sheet1Na /** * very similar to {@link SheetNameFormatter#isSpecialChar(char)} */ - private bool IsUnquotedSheetNameChar(char ch) + private bool IsUnquotedSheetNameChar(int ch) { - if (Char.IsLetterOrDigit(ch)) + if (IsLetterOrDigit(ch)) + { + return true; + } + // the sheet naming rules are vague on whether unicode characters are allowed + // assume they're allowed. + if (ch > 128) { return true; } @@ -1530,7 +1555,7 @@ private void ResetPointer(int ptr) _pointer = ptr; if (_pointer <= _formulaLength) { - look = _formulaString[_pointer - 1]; + look = _formulaString.CodePointAt(_pointer - StringUtil.CharCount(look)); } else { @@ -1761,7 +1786,7 @@ private void ValidateNumArgs(int numArgs, FunctionMetadata fm) } } - private static bool IsArgumentDelimiter(char ch) + private static bool IsArgumentDelimiter(int ch) { return ch == ',' || ch == ')'; } @@ -1875,7 +1900,7 @@ private ParseNode ParseSimpleFactor() } // named ranges and tables can start with underscore or backslash // see https://support.office.com/en-us/article/Define-and-use-names-in-formulas-4d0f13ac-53b7-422e-afd2-abd7ff379c64?ui=en-US&rs=en-US&ad=US#bmsyntax_rules_for_names - if (IsAlpha(look) || Char.IsDigit(look) || look == '\'' || look == '[' || look == '_' || look == '\\') + if (IsAlpha(look) || IsDigit(look) || look == '\'' || look == '[' || look == '_' || look == '\\') { return ParseRangeExpression(); } @@ -2237,7 +2262,7 @@ private String ParseStringLiteral() break; } } - Token.Append(look); + Token.Append(char.ConvertFromUtf32(look)); GetChar(); } return Token.ToString(); diff --git a/main/Util/StringUtil.cs b/main/Util/StringUtil.cs index bf882fed9..a914d66a7 100644 --- a/main/Util/StringUtil.cs +++ b/main/Util/StringUtil.cs @@ -42,7 +42,7 @@ namespace NPOI.Util /// @since May 10, 2002 /// @version 1.0 /// - public class StringUtil + public static class StringUtil { private static Encoding ISO_8859_1 = Encoding.GetEncoding("ISO-8859-1"); private static Encoding UTF16LE = Encoding.Unicode; @@ -50,9 +50,9 @@ public class StringUtil /** * Constructor for the StringUtil object */ - private StringUtil() - { - } + //private StringUtil() + //{ + //} /// /// Given a byte array of 16-bit unicode characters in Little Endian @@ -877,5 +877,21 @@ public static int CountMatches(string haystack, char needle) } return count; } + + public static int CodePointAt(this string text, int index) + { + if (!char.IsSurrogate(text[index])) + { + return (int)text[index]; + } + if (index + 1 < text.Length && char.IsSurrogatePair(text[index], text[index + 1])) + { + return char.ConvertToUtf32(text[index], text[index+1]); + } + else + { + throw new Exception("String was not well-formed UTF-16."); + } + } } } diff --git a/ooxml/XSSF/UserModel/XSSFSheet.cs b/ooxml/XSSF/UserModel/XSSFSheet.cs index 8d6f11c20..46508c806 100644 --- a/ooxml/XSSF/UserModel/XSSFSheet.cs +++ b/ooxml/XSSF/UserModel/XSSFSheet.cs @@ -1502,7 +1502,26 @@ CT_Hyperlink[] ctHls worksheet.hyperlinks.SetHyperlinkArray(ctHls); } - + else + { + if (worksheet.hyperlinks != null) + { + int count = worksheet.hyperlinks.SizeOfHyperlinkArray(); + for (int i = count - 1; i >= 0; i--) + { + worksheet.hyperlinks.RemoveHyperlink(i); + } + // For some reason, we have to remove the hyperlinks one by one from the CTHyperlinks array + // before unsetting the hyperlink array. + // Resetting the hyperlink array seems to break some XML nodes. + //worksheet.getHyperlinks().setHyperlinkArray(new CTHyperlink[0]); + worksheet.UnsetHyperlinks(); + } + else + { + // nothing to do + } + } foreach (XSSFRow row in _rows.Values) { row.OnDocumentWrite(); diff --git a/testcases/main/SS/UserModel/BaseTestNamedRange.cs b/testcases/main/SS/UserModel/BaseTestNamedRange.cs index 761eefc10..98452acba 100644 --- a/testcases/main/SS/UserModel/BaseTestNamedRange.cs +++ b/testcases/main/SS/UserModel/BaseTestNamedRange.cs @@ -741,6 +741,20 @@ public void TestInvalid() wb.Close(); } + // bug 60260: renaming a sheet with a named range referring to a unicode (non-ASCII) sheet name + [Test] + public void RenameSheetWithNamedRangeReferringToUnicodeSheetName() + { + IWorkbook wb = _testDataProvider.CreateWorkbook(); + wb.CreateSheet("Sheet\u30FB1"); + + IName name = wb.CreateName(); + name.NameName = ("test_named_range"); + name.RefersToFormula = ("'Sheet\u30FB201'!A1:A6"); + + wb.SetSheetName(0, "Sheet 1"); + IOUtils.CloseQuietly(wb); + } } } \ No newline at end of file diff --git a/testcases/main/SS/UserModel/BaseTestSheet.cs b/testcases/main/SS/UserModel/BaseTestSheet.cs index 2076d8f82..c0809fa07 100644 --- a/testcases/main/SS/UserModel/BaseTestSheet.cs +++ b/testcases/main/SS/UserModel/BaseTestSheet.cs @@ -1326,6 +1326,30 @@ public void GetHyperlink() workbook.Close(); } + [Test] + public void RemoveAllHyperlinks() + { + IWorkbook workbook = _testDataProvider.CreateWorkbook(); + IHyperlink hyperlink = workbook.GetCreationHelper().CreateHyperlink(HyperlinkType.Url); + hyperlink.Address = "https://poi.apache.org/"; + ISheet sheet = workbook.CreateSheet(); + ICell cell = sheet.CreateRow(5).CreateCell(1); + cell.Hyperlink = hyperlink; + + Assert.AreEqual(1, workbook.GetSheetAt(0).GetHyperlinkList().Count); + // Save a workbook with a hyperlink + IWorkbook workbook2 = _testDataProvider.WriteOutAndReadBack(workbook); + Assert.AreEqual(1, workbook2.GetSheetAt(0).GetHyperlinkList().Count); + + // Remove all hyperlinks from a saved workbook + workbook2.GetSheetAt(0).GetRow(5).GetCell(1).RemoveHyperlink(); + Assert.AreEqual(0, workbook2.GetSheetAt(0).GetHyperlinkList().Count); + + // Verify that hyperlink was removed from workbook after writing out + IWorkbook workbook3 = _testDataProvider.WriteOutAndReadBack(workbook2); + Assert.AreEqual(0, workbook3.GetSheetAt(0).GetHyperlinkList().Count); + } + [Test] public void NewMergedRegionAt() { diff --git a/testcases/ooxml/XSSF/SXSSFITestDataProvider.cs b/testcases/ooxml/XSSF/SXSSFITestDataProvider.cs index dbc5876ed..b4035a8d7 100644 --- a/testcases/ooxml/XSSF/SXSSFITestDataProvider.cs +++ b/testcases/ooxml/XSSF/SXSSFITestDataProvider.cs @@ -57,9 +57,12 @@ public IWorkbook OpenSampleWorkbook(String sampleFileName) public IWorkbook WriteOutAndReadBack(IWorkbook wb) { - if (!(wb is SXSSFWorkbook)) + // wb is usually an SXSSFWorkbook, but must also work on an XSSFWorkbook + // since workbooks must be able to be written out and read back + // several times in succession + if (!(wb is SXSSFWorkbook || wb is XSSFWorkbook)) { - throw new ArgumentException("Expected an instance of SXSSFWorkbook"); + throw new ArgumentException("Expected an instance of XSSFWorkbook or SXSSFWorkbook"); } XSSFWorkbook result; @@ -133,4 +136,4 @@ public bool Cleanup() } } -} \ No newline at end of file +} diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFSheet.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFSheet.cs index 77225b02b..9082d2e95 100644 --- a/testcases/ooxml/XSSF/UserModel/TestXSSFSheet.cs +++ b/testcases/ooxml/XSSF/UserModel/TestXSSFSheet.cs @@ -1455,6 +1455,80 @@ public void TestSetColumnGroupCollapsed() wb2.Close(); } + /** + * Verify that column groups were created correctly after Sheet.groupColumn + * + * @param col the column group xml bean + * @param fromColumnIndex 0-indexed + * @param toColumnIndex 0-indexed + */ + private static void checkColumnGroup( + CT_Col col, + int fromColumnIndex, int toColumnIndex, + bool isSetHidden, bool isSetCollapsed + ) + { + Assert.AreEqual(fromColumnIndex, col.min - 1, "from column index"); // 1 based + Assert.AreEqual(toColumnIndex, col.max - 1, "to column index"); // 1 based + Assert.AreEqual(isSetHidden, col.IsSetHidden(), "isSetHidden"); + Assert.AreEqual(isSetCollapsed, col.IsSetCollapsed(), "isSetCollapsed"); //not necessarily set + } + + /** + * Verify that column groups were created correctly after Sheet.groupColumn + * + * @param col the column group xml bean + * @param fromColumnIndex 0-indexed + * @param toColumnIndex 0-indexed + */ + private static void checkColumnGroup( + CT_Col col, + int fromColumnIndex, int toColumnIndex + ) + { + Assert.AreEqual(fromColumnIndex, col.min - 1, "from column index"); // 1 based + Assert.AreEqual(toColumnIndex, col.max - 1, "to column index"); // 1 based + Assert.IsFalse(col.IsSetHidden(), "isSetHidden"); + Assert.IsTrue(col.IsSetCollapsed(), "isSetCollapsed"); //not necessarily set + } + /** + * Verify that column groups were created correctly after Sheet.groupColumn + * + * @param col the column group xml bean + * @param fromColumnIndex 0-indexed + * @param toColumnIndex 0-indexed + */ + private static void checkColumnGroupIsCollapsed( + CT_Col col, + int fromColumnIndex, int toColumnIndex + ) + { + Assert.AreEqual(fromColumnIndex, col.min - 1, "from column index"); // 1 based + Assert.AreEqual(toColumnIndex, col.max - 1, "to column index"); // 1 based + Assert.IsTrue(col.IsSetHidden(), "isSetHidden"); + Assert.IsTrue(col.IsSetCollapsed(), "isSetCollapsed"); + //assertTrue("getCollapsed", col.getCollapsed()); + } + /** + * Verify that column groups were created correctly after Sheet.groupColumn + * + * @param col the column group xml bean + * @param fromColumnIndex 0-indexed + * @param toColumnIndex 0-indexed + */ + private static void checkColumnGroupIsExpanded( + CT_Col col, + int fromColumnIndex, int toColumnIndex + ) + { + Assert.AreEqual(fromColumnIndex, col.min - 1, "from column index"); // 1 based + Assert.AreEqual(toColumnIndex, col.max - 1, "to column index"); // 1 based + Assert.IsFalse(col.IsSetHidden(), "isSetHidden"); + Assert.IsTrue(col.IsSetCollapsed(), "isSetCollapsed"); + //assertTrue("isSetCollapsed", !col.isSetCollapsed() || !col.getCollapsed()); + //assertFalse("getCollapsed", col.getCollapsed()); + } + /** * TODO - while this is internally consistent, I'm not * completely clear in all cases what it's supposed to @@ -2211,8 +2285,7 @@ private XSSFWorkbook SetupSheet() ICell cell6 = row3.CreateCell(1); cell6.SetCellValue(3); - return wb; - } + return wb; } [Test] public void TestCreateTwoPivotTablesInOneSheet() diff --git a/testcases/ooxml/XSSF/UserModel/TestXSSFSheetShiftRows.cs b/testcases/ooxml/XSSF/UserModel/TestXSSFSheetShiftRows.cs index 212e0f3f7..4276ce34c 100644 --- a/testcases/ooxml/XSSF/UserModel/TestXSSFSheetShiftRows.cs +++ b/testcases/ooxml/XSSF/UserModel/TestXSSFSheetShiftRows.cs @@ -18,6 +18,7 @@ limitations under the License. using NPOI.OpenXmlFormats.Spreadsheet; using NPOI.SS.UserModel; using NPOI.SS.Util; +using NPOI.Util; using NPOI.XSSF; using NPOI.XSSF.UserModel; using NUnit.Framework; @@ -462,6 +463,15 @@ public void TestSharedFormulas() wb.Close(); } + // bug 60260: shift rows or rename a sheet containing a named range + // that refers to formula with a unicode (non-ASCII) sheet name formula + [Test] + public void ShiftRowsWithUnicodeNamedRange() + { + XSSFWorkbook wb = XSSFTestDataSamples.OpenSampleWorkbook("unicodeSheetName.xlsx"); + XSSFSheet sheet = wb.GetSheetAt(0) as XSSFSheet; + sheet.ShiftRows(1, 2, 3); + IOUtils.CloseQuietly(wb); + } } } - diff --git a/testcases/test-data/spreadsheet/unicodeSheetName.xlsx b/testcases/test-data/spreadsheet/unicodeSheetName.xlsx new file mode 100644 index 000000000..8c0fa8c4d Binary files /dev/null and b/testcases/test-data/spreadsheet/unicodeSheetName.xlsx differ