Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<entry xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="https://logging.apache.org/xml/ns"
xsi:schemaLocation="https://logging.apache.org/xml/ns https://logging.apache.org/xml/ns/log4j-changelog-0.xsd"
type="fixed">
<issue id="290" link="https://github.com/apache/logging-log4net/issues/290"/>
<issue id="291" link="https://github.com/apache/logging-log4net/pull/291"/>
<description format="asciidoc">
fix silent corruption of supplementary Unicode characters (U+10000–U+10FFFF) in `MaskXmlInvalidCharacters`:
valid UTF-16 surrogate pairs are now preserved instead of being replaced with the mask string
(reported by @N0tre3l, fixed by @freeandnil in https://github.com/apache/logging-log4net/pull/291[#291])
</description>
</entry>
145 changes: 143 additions & 2 deletions src/log4net.Tests/Util/TransformTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,168 @@
//
#endregion

using System;
using System.Reflection;
using log4net.Util;

using NUnit.Framework;

namespace log4net.Tests.Util;


[TestFixture]
public class TransformTest
{
/// <summary>
/// Verifies that a valid BMP character in the allowed XML 1.0 range (U+203B ※)
/// is passed through unchanged.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersAllowsJapaneseCharacters()
{
const string kome = "\u203B";
Assert.That(Transform.MaskXmlInvalidCharacters(kome, "?"), Is.EqualTo(kome));
}

/// <summary>
/// Verifies that the NUL character (U+0000), which is illegal in XML 1.0,
/// is replaced with the supplied mask string.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersMasks0Char()
{
const string c = "\0";
Assert.That(Transform.MaskXmlInvalidCharacters(c, "?"), Is.EqualTo("?"));
}

/// <summary>
/// Verifies that a well-formed UTF-16 surrogate pair representing a supplementary
/// character (U+1F511 🔑) is treated as a valid XML character and preserved intact.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersPreservesValidSurrogatePair()
{
// U+1F511 KEY (🔑) — encoded as surrogate pair \uD83D\uDD11
const string key = "\uD83D\uDD11";
Assert.That(Transform.MaskXmlInvalidCharacters(key, "?"), Is.EqualTo(key));
}

/// <summary>
/// Verifies that a supplementary character embedded within plain ASCII text
/// is preserved without corrupting the surrounding characters.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersPreservesSupplementaryCharacterInContext()
{
// "admin🔑" — only the emoji is a surrogate pair; all other chars are BMP
const string input = "admin\uD83D\uDD11";
Assert.That(Transform.MaskXmlInvalidCharacters(input, "?"), Is.EqualTo(input));
}

/// <summary>
/// Verifies that a lone high surrogate (U+D800–U+DBFF) not followed by a
/// low surrogate is malformed UTF-16 and is replaced with the mask string.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersMasksLoneHighSurrogate()
{
// A lone high surrogate with no following low surrogate is illegal
const string loneSurrogate = "\uD83D";
Assert.That(Transform.MaskXmlInvalidCharacters(loneSurrogate, "?"), Is.EqualTo("?"));
}

/// <summary>
/// Verifies that a lone low surrogate (U+DC00–U+DFFF) not preceded by a
/// high surrogate is malformed UTF-16 and is replaced with the mask string.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersMasksLoneLowSurrogate()
{
const string loneSurrogate = "\uDD11";
Assert.That(Transform.MaskXmlInvalidCharacters(loneSurrogate, "?"), Is.EqualTo("?"));
}

/// <summary>
/// Verifies current behaviour: a null input throws <see cref="ArgumentNullException"/>
/// (delegated to the regex engine). Add a null-guard to <see cref="Transform.MaskXmlInvalidCharacters"/>
/// if a graceful fallback is preferred.
/// </summary>
[Test]
public void MaskXmlInvalidCharactersNullInputThrowsArgumentNullException()
=> Assert.That(() => Transform.MaskXmlInvalidCharacters(null!, "?"),
Throws.ArgumentNullException);

#region CountSubstrings tests (private method accessed via reflection)

/// <summary>
/// Resolves the private static <see cref="CountSubstrings"/> method once for all tests
/// </summary>
private static Func<string, string, int> CountSubstrings { get; } = (Func<string, string, int>)Delegate.CreateDelegate(
typeof(Func<string, string, int>), typeof(Transform).GetMethod(nameof(CountSubstrings), BindingFlags.NonPublic | BindingFlags.Static)
?? throw new MissingMethodException(nameof(Transform), nameof(CountSubstrings)));

/// <summary>
/// Verifies that an empty text returns zero regardless of the substring.
/// </summary>
[Test]
public void CountSubstringsEmptyTextReturnsZero()
=> Assert.That(CountSubstrings("", "a"), Is.Zero);

/// <summary>
/// Verifies that an empty substring returns zero regardless of the text.
/// </summary>
[Test]
public void CountSubstringsEmptySubstringReturnsZero()
=> Assert.That(CountSubstrings("abc", ""), Is.Zero);

/// <summary>
/// Verifies that a substring not present in the text returns zero.
/// </summary>
[Test]
public void CountSubstringsNoMatchReturnsZero()
=> Assert.That(CountSubstrings("hello", "x"), Is.Zero);

/// <summary>
/// Verifies that a single-character substring occurring once is counted correctly.
/// </summary>
[Test]
public void CountSubstringsSingleCharSingleMatchReturnsOne()
=> Assert.That(CountSubstrings("hello", "h"), Is.EqualTo(1));

/// <summary>
/// Verifies that a single-character substring occurring multiple times is counted correctly.
/// </summary>
[Test]
public void CountSubstringsSingleCharMultipleMatchesReturnsCorrectCount()
=> Assert.That(CountSubstrings("banana", "a"), Is.EqualTo(3));

/// <summary>
/// Verifies counting of XML special characters as used by <c>WriteEscapedXmlString</c>.
/// </summary>
[TestCase("<hello><world>", "<", 2)]
[TestCase("<hello><world>", ">", 2)]
[TestCase("a&amp;&amp;b", "&", 2)]
public void CountSubstringsXmlSpecialCharsReturnsCorrectCount(string text, string substring, int expected)
=> Assert.That(CountSubstrings(text, substring), Is.EqualTo(expected));

/// <summary>
/// Verifies that the CDATA end token <c>]]&gt;</c> is counted correctly as a multi-character substring.
/// </summary>
[Test]
public void CountSubstringsCdataEndReturnsCorrectCount()
=> Assert.That(CountSubstrings("foo]]>bar]]>baz", "]]>"), Is.EqualTo(2));

/// <summary>
/// Verifies that non-overlapping occurrences are counted and overlapping ones are not,
/// consistent with the method's documented assumption.
/// </summary>
[Test]
public void CountSubstringsNonOverlappingReturnsCorrectCount()
=> Assert.That(CountSubstrings("aaaa", "aa"), Is.EqualTo(2));

/// <summary>
/// Verifies that a substring equal to the entire text counts as one match.
/// </summary>
[Test]
public void CountSubstringsSubstringEqualsTextReturnsOne()
=> Assert.That(CountSubstrings("]]>", "]]>"), Is.EqualTo(1));
#endregion
}
57 changes: 39 additions & 18 deletions src/log4net/Util/Transform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
//
#endregion

using System.Xml;
using System;
using System.Text.RegularExpressions;
using System.Xml;

namespace log4net.Util;

Expand Down Expand Up @@ -110,9 +111,15 @@ public static void WriteEscapedXmlString(this XmlWriter writer, string textData,
/// This method replaces any illegal characters in the input string
/// with the mask string specified.
/// </para>
/// <para>
/// Supplementary characters (U+10000–U+10FFFF) encoded as valid UTF-16 surrogate
/// pairs are preserved; only lone surrogates and other illegal code units are masked.
/// </para>
/// </remarks>
public static string MaskXmlInvalidCharacters(string textData, string mask)
=> _invalidchars.Replace(textData, mask);
public static string MaskXmlInvalidCharacters(string textData, string mask)
=> _invalidChars.Replace(textData, m =>
// A valid surrogate pair represents a legal supplementary character — preserve it.
m.Value.Length == 2 ? m.Value : mask);

/// <summary>
/// Count the number of times that the substring occurs in the text
Expand All @@ -127,29 +134,38 @@ public static string MaskXmlInvalidCharacters(string textData, string mask)
/// </remarks>
private static int CountSubstrings(string text, string substring)
{
int count = 0;
int offset = 0;
int length = text.Length;
int substringLength = substring.Length;

if (length == 0)
if (text.Length == 0 || substring.Length == 0)
{
return 0;
}
if (substringLength == 0)

// Use the char overload for single-character substrings — avoids string
// comparison overhead; all current callers pass single ASCII characters.
if (substring.Length == 1)
{
return 0;
int charCount = 0;
char target = substring[0];
foreach (char c in text)
{
if (c == target)
{
charCount++;
}
}
return charCount;
}

while (offset < length)
int count = 0;
int offset = 0;
int substringLength = substring.Length;
while (offset < text.Length)
{
int index = text.IndexOf(substring, offset);

// Ordinal avoids culture-sensitive comparison for these ASCII-only tokens.
int index = text.IndexOf(substring, offset, StringComparison.Ordinal);
if (index == -1)
{
break;
}

count++;
offset = index + substringLength;
}
Expand All @@ -160,7 +176,12 @@ private static int CountSubstrings(string text, string substring)
private const string CdataUnescapableToken = "]]";

/// <summary>
/// Characters illegal in XML 1.0
/// Matches either a valid UTF-16 surrogate pair (preserved) or a single character
/// that is illegal in XML 1.0 (replaced). The surrogate-pair alternative must come
/// first so the engine consumes both code units together before the single-char
/// alternative can match them individually.
/// </summary>
private static readonly Regex _invalidchars = new(@"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]", RegexOptions.Compiled);
}
private static readonly Regex _invalidChars = new(
@"[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]",
RegexOptions.Compiled);
}
Loading