diff --git a/XmppSharp.Expat/ExpatXmppParser.cs b/XmppSharp.Expat/ExpatXmppParser.cs index 764fad1..fc23371 100644 --- a/XmppSharp.Expat/ExpatXmppParser.cs +++ b/XmppSharp.Expat/ExpatXmppParser.cs @@ -1,20 +1,19 @@ -using System.Text; -using System.Text.RegularExpressions; +using System.Text.RegularExpressions; using System.Xml; -using Expat; using XmppSharp.Dom; +using XmppSharp.Expat; using XmppSharp.Exceptions; using XmppSharp.Factory; using XmppSharp.Protocol.Base; -namespace XmppSharp.Parsers; +namespace XmppSharp.Parser; /// /// An enhanced XMPP parser built using Expat library. /// public partial class ExpatXmppParser : BaseXmppParser { - private Parser _parser; + private ExpatParser _parser; private Element _currentElem; private XmlNamespaceManager _nsStack; private NameTable _xmlNames; @@ -33,26 +32,26 @@ void AddNamespacesToScope(IReadOnlyDictionary attrs) } } - public ExpatXmppParser(ExpatEncodingType encoding = ExpatEncodingType.Utf8) + public ExpatXmppParser(EncodingType encoding = EncodingType.UTF8) { this._nsStack = new(this._xmlNames = new NameTable()); - this._parser = new Parser(encoding); + this._parser = new ExpatParser(encoding); - this._parser.OnElementStart += e => + this._parser.OnElementStart += (name, attributes) => { this._nsStack.PushScope(); - AddNamespacesToScope(e.Attributes); + AddNamespacesToScope(attributes); - var qname = Xml.ExtractQualifiedName(e.Name); + var qname = Xml.ExtractQualifiedName(name); var ns = this._nsStack.LookupNamespace(qname.HasPrefix ? qname.Prefix : string.Empty); - if (e.Name is "iq" or "message" or "presence") // work-around + if (name is "iq" or "message" or "presence") // work-around ns ??= Namespace.Client; - var element = ElementFactory.Create(e.Name, ns); + var element = ElementFactory.Create(name, ns); //foreach (var (key, value) in _nsStack.GetNamespacesInScope(XmlNamespaceScope.Local)) //{ @@ -60,10 +59,10 @@ public ExpatXmppParser(ExpatEncodingType encoding = ExpatEncodingType.Utf8) // element.SetAttribute(att, value); //} - foreach (var (key, value) in e.Attributes) + foreach (var (key, value) in attributes) element.SetAttribute(key, value); - if (e.Name == "stream:stream") + if (name == "stream:stream") AsyncHelper.RunSync(() => FireStreamStart(element as StreamStream)); else { @@ -72,11 +71,11 @@ public ExpatXmppParser(ExpatEncodingType encoding = ExpatEncodingType.Utf8) } }; - this._parser.OnElementEnd += e => + this._parser.OnElementEnd += (name) => { this._nsStack.PopScope(); - if (e.Value == "stream:stream") + if (name == "stream:stream") AsyncHelper.RunSync(() => FireStreamEnd()); else { @@ -86,10 +85,10 @@ public ExpatXmppParser(ExpatEncodingType encoding = ExpatEncodingType.Utf8) AsyncHelper.RunSync(() => FireStreamElement(_currentElem)); else { - if (e.Value != _currentElem.TagName) + if (name != _currentElem.TagName) { var ex = new JabberStreamException(StreamErrorCondition.InvalidXml, "Parent end tag mismatch."); - ex.Data.Add("Actual", e.Value); + ex.Data.Add("Actual", name); ex.Data.Add("Expected", _currentElem.TagName); throw ex; } @@ -99,36 +98,34 @@ public ExpatXmppParser(ExpatEncodingType encoding = ExpatEncodingType.Utf8) } }; - this._parser.OnText += e => + this._parser.OnText += (type, text) => { - if (_currentElem != null) + if (_currentElem == null) + return; + + if (type == ContentNodeType.Text) { - var trimWS = _currentElem.GetAttribute("xml:space") != "preserve"; + var trimWhitespace = _currentElem.GetAttribute("xml:space") != "preserve"; - // skip whitespace if not explicit declared. - if (string.IsNullOrWhiteSpace(e.Value) && trimWS) + if (trimWhitespace && text.All(XmlConvert.IsWhitespaceChar)) return; - var val = e.Value; - - if (trimWS) // same for trailing whitespace - val = TrimWhitespace(val); + if (trimWhitespace) // same for trailing whitespace + text = TrimWhitespace(text); - if (_currentElem.LastNode is Text text) - text.Value += val; + if (_currentElem.LastNode is Text node) + node.Value += text; else - _currentElem.AddChild(new Text(val)); + _currentElem.AddChild(new Text(text)); + } + else if (type == ContentNodeType.Cdata) + { + this._currentElem.AddChild(new Cdata(text)); + } + else if (type == ContentNodeType.Comment) + { + this._currentElem.AddChild(new Comment(text)); } - }; - - this._parser.OnCdata += e => - { - this._currentElem?.AddChild(new Cdata(e.Value)); - }; - - this._parser.OnComment += e => - { - this._currentElem?.AddChild(new Comment(e.Value)); }; } @@ -165,34 +162,10 @@ public void Reset() this._parser.Reset(); } - public void Write(byte[] buffer, int offset, int length, bool isFinalBlock = false) - { - this.EnsureNotDisposed(); - - byte[] temp; - - try - { - temp = GC.AllocateUninitializedArray(length, true); - Buffer.BlockCopy(buffer, offset, temp, 0, length); - this._parser.Feed(temp, length, isFinalBlock); - } - finally - { - temp = null; - } - } - public void Write(byte[] buffer, int length, bool isFinalBlock = false) { this.EnsureNotDisposed(); - this._parser.Feed(buffer, length, isFinalBlock); - } - - public void Write(byte[] buffer, bool isFinalBlock = false) - { - this.EnsureNotDisposed(); - this._parser.Feed(buffer, buffer.Length, isFinalBlock); + //this._parser.WriteBuffer(buffer, length, isFinalBlock); } protected override void Disposing() diff --git a/XmppSharp.Expat/PInvoke.cs b/XmppSharp.Expat/PInvoke.cs new file mode 100644 index 0000000..6eb9544 --- /dev/null +++ b/XmppSharp.Expat/PInvoke.cs @@ -0,0 +1,253 @@ +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace XmppSharp.Expat; + +public delegate void ExpatDeclarationHandler(string version, string? encoding, bool? standalone); +public delegate void ExpatStartElementHandler(string name, IReadOnlyDictionary attributes); +public delegate void ExpatContentElementHandler(ContentNodeType type, string data); +public delegate void ExpatEndElementHandler(string name); + +public class ExpatParser : IDisposable +{ + protected nint m_CPointer; + private volatile bool m_bDisposed; + private readonly string m_EncodingName; + + public event ExpatDeclarationHandler OnProlog; + public event ExpatStartElementHandler OnElementStart; + public event ExpatContentElementHandler OnText; + public event ExpatEndElementHandler OnElementEnd; + + public ExpatParser(EncodingType encoding) + { + m_EncodingName = encoding switch + { + EncodingType.ASCII => "US-ASCII", + EncodingType.ISO88591 => "", + EncodingType.UTF16 => "UTF-16", + EncodingType.UTF8 or _ => "UTF-8" + }; + + m_CPointer = PInvoke.XML_ParserCreate(m_EncodingName); + } + + public void Reset() + { + + } + + public void Dispose() + { + if (!m_bDisposed) + { + m_bDisposed = true; + } + } +} + +public class ExpatFeature +{ + public Feature Type { get; init; } + public string Name { get; init; } + public long Value { get; init; } + + struct SFeatureInfo + { + public Feature f; + public nint n; + public long v; + } + + public static IEnumerable Features { get; private set; } + + [ModuleInitializer] + internal static unsafe void Init() + { + var result = new List(); + + var featureList = (SFeatureInfo*)PInvoke.XML_GetFeatureList(); + + while (featureList->f != Feature.XML_FEATURE_END) + { + result.Add(new ExpatFeature + { + Type = featureList->f, + Name = Marshal.PtrToStringAnsi(featureList->n), + Value = featureList->v + }); + + featureList++; + } + + Features = result.AsReadOnly(); + } +} + +public enum EncodingType +{ + ASCII, + UTF8, + UTF16, + ISO88591, +} + +public enum ContentNodeType +{ + Text, + Comment, + Cdata, +} + +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void PrologHandler(nint userData, [In] nint encoding, [In] nint version, int standalone); + +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void StartElementHandler(nint userData, [In] nint name, [In] nint attributes); + +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void EndElementHandler(nint userData, [In] nint name); + +// data is not 0 terminated +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void CharacterDataHandler(nint userData, [In] nint data, [MarshalAs(UnmanagedType.I4)] int len); + +// target and data are 0 terminated +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void ProcessingInstructionHandler(nint userData, [In] nint target, [In] nint data); + +// data is 0 terminated +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void CommentHandler(nint userData, [In] nint data); + +[UnmanagedFunctionPointer(CallingConvention.Cdecl)] +public delegate void CdataSectionHandler(nint userData); + +public static unsafe class PInvoke +{ + const string LibraryName = "libexpat"; + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern nint XML_ParserCreate([In] string encoding); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_ParserFree(nint parser); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_SetXmlDeclHandler(nint parser, PrologHandler pHandler); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.I1)] + public static extern bool XML_ParserReset(nint parser, [In, Optional] string encoding); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_SetStartElementHandler(nint parser, [In] StartElementHandler handler); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_SetEndElementHandler(nint parser, [In] EndElementHandler handler); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_SetCharacterDataHandler(nint parser, [In] CharacterDataHandler handler); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_SetCommentHandler(nint parser, [In] CommentHandler handler); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void XML_SetCdataSectionHandler(nint parser, [In] CdataSectionHandler start, [In] CdataSectionHandler end); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.LPStr)] + public static extern string XML_ErrorString(Error code); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + [return: MarshalAs(UnmanagedType.LPStr)] + public static extern string XML_ExpatVersion(); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern nint XML_GetFeatureList(); +} + +public enum Feature : byte +{ + XML_FEATURE_END = 0, + XML_FEATURE_UNICODE, + XML_FEATURE_UNICODE_WCHAR_T, + XML_FEATURE_DTD, + XML_FEATURE_CONTEXT_BYTES, + XML_FEATURE_MIN_SIZE, + XML_FEATURE_SIZEOF_XML_CHAR, + XML_FEATURE_SIZEOF_XML_LCHAR, + XML_FEATURE_NS, + XML_FEATURE_LARGE_SIZE, + XML_FEATURE_ATTR_INFO, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT +} + +public class ExpatException : Exception +{ + public Error Code { get; } + + public ExpatException(Error code) : base(PInvoke.XML_ErrorString(code)) + => Code = code; + + public ExpatException(Error code, string message) : base(message) + { + Code = code; + } +} + +public enum Status +{ + XML_STATUS_ERROR = 0, + XML_STATUS_OK = 1, + XML_STATUS_SUSPENDED = 2 +} + +public enum Error +{ + XML_ERROR_NONE, + XML_ERROR_NO_MEMORY, + XML_ERROR_SYNTAX, + XML_ERROR_NO_ELEMENTS, + XML_ERROR_INVALID_TOKEN, + XML_ERROR_UNCLOSED_TOKEN, + XML_ERROR_PARTIAL_CHAR, + XML_ERROR_TAG_MISMATCH, + XML_ERROR_DUPLICATE_ATTRIBUTE, + XML_ERROR_JUNK_AFTER_DOC_ELEMENT, + XML_ERROR_PARAM_ENTITY_REF, + XML_ERROR_UNDEFINED_ENTITY, + XML_ERROR_RECURSIVE_ENTITY_REF, + XML_ERROR_ASYNC_ENTITY, + XML_ERROR_BAD_CHAR_REF, + XML_ERROR_BINARY_ENTITY_REF, + XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, + XML_ERROR_MISPLACED_XML_PI, + XML_ERROR_UNKNOWN_ENCODING, + XML_ERROR_INCORRECT_ENCODING, + XML_ERROR_UNCLOSED_CDATA_SECTION, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + XML_ERROR_NOT_STANDALONE, + XML_ERROR_UNEXPECTED_STATE, + XML_ERROR_ENTITY_DECLARED_IN_PE, + XML_ERROR_FEATURE_REQUIRES_XML_DTD, + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, + XML_ERROR_UNBOUND_PREFIX, + XML_ERROR_UNDECLARING_PREFIX, + XML_ERROR_INCOMPLETE_PE, + XML_ERROR_XML_DECL, + XML_ERROR_TEXT_DECL, + XML_ERROR_PUBLICID, + XML_ERROR_SUSPENDED, + XML_ERROR_NOT_SUSPENDED, + XML_ERROR_ABORTED, + XML_ERROR_FINISHED, + XML_ERROR_SUSPEND_PE, + XML_ERROR_RESERVED_PREFIX_XML, + XML_ERROR_RESERVED_PREFIX_XMLNS, + XML_ERROR_RESERVED_NAMESPACE_URI, + XML_ERROR_INVALID_ARGUMENT, + XML_ERROR_NO_BUFFER, + XML_ERROR_AMPLIFICATION_LIMIT_BREACH +}; \ No newline at end of file diff --git a/XmppSharp.Expat/XmppSharp.Expat.csproj b/XmppSharp.Expat/XmppSharp.Expat.csproj index 89562d4..1028676 100644 --- a/XmppSharp.Expat/XmppSharp.Expat.csproj +++ b/XmppSharp.Expat/XmppSharp.Expat.csproj @@ -4,6 +4,7 @@ net8.0;net7.0 enable annotations + true @@ -35,10 +36,6 @@ LICENSE - - - - diff --git a/XmppSharp.Test/ExpatParserTests.cs b/XmppSharp.Test/ExpatParserTests.cs deleted file mode 100644 index 39cee07..0000000 --- a/XmppSharp.Test/ExpatParserTests.cs +++ /dev/null @@ -1,361 +0,0 @@ -using System.IO.Compression; -using Expat; -using XmppSharp.Dom; -using XmppSharp.Parsers; -using XmppSharp.Protocol.Base; - -namespace XmppSharp.Test; - -[TestClass] -public class ExpatParserTests -{ - [TestMethod] - public async Task ParseFromBuffer() - { - string xml = ""; - - using var parser = new ExpatXmppParser(ExpatEncodingType.Utf8); - - var tcs = new TaskCompletionSource(); - - parser.OnStreamElement += e => - { - tcs.TrySetResult(e); - return Task.CompletedTask; - }; - - parser.Write(xml.GetBytes(), true); - - var result = await tcs.Task; - - Console.WriteLine("XML:\n" + result.ToString(XmlFormatting.None)); - } - - [TestMethod] - public async Task ParseStreamError() - { - // Not declared: xmlns:stream="http://etherx.jabber.org/streams" - // but expat can parse even with missing namespace declaration 😛 - // test output will be: - - /* - - Element: stream:error - Element: bad-namespace-prefix - Attribute: xmlns=urn:ietf:params:xml:ns:xmpp-streams - - */ - - var xml = @" - -"; - - using var parser = new ExpatXmppParser(); - - var tcs = new TaskCompletionSource(); - - parser.OnStreamElement += e => - { - tcs.TrySetResult(e); - return Task.CompletedTask; - }; - - using var stream = new MemoryStream(xml.GetBytes()); - - stream.Position = 0; - - _ = Task.Run(async () => - { - // simulate IO - - try - { - var buf = new byte[16]; - int cnt; - - while (true) - { - cnt = await stream.ReadAsync(buf); - parser.Write(buf, cnt, cnt == 0); - - if (cnt == 0) - break; - } - } - catch (Exception e) - { - tcs.TrySetException(e); - } - }); - - _ = Task.Delay(3000).ContinueWith(_ => tcs.TrySetCanceled()); - - var element = await tcs.Task; - - Assert.IsNotNull(element); - Assert.AreEqual("stream:error", element.TagName); - - Dump(element); - } - - [TestMethod] - public async Task ParseStreamStart() - { - var xml = new StreamStream - { - From = "localhost", - To = "user1", - Id = Guid.NewGuid().ToString(), - Version = "1.0", - Language = "en" - }.StartTag(); - - using var parser = new ExpatXmppParser(); - - var tcs = new TaskCompletionSource(); - - parser.OnStreamStart += e => - { - tcs.TrySetResult(e); - return Task.CompletedTask; - }; - - using var stream = new MemoryStream(xml.GetBytes()); - stream.Position = 0; - - _ = Task.Run(async () => - { - // simulate IO - - try - { - var buf = new byte[16]; - int cnt; - - while (true) - { - cnt = await stream.ReadAsync(buf); - parser.Write(buf, cnt, cnt == 0); - - if (cnt == 0) - break; - } - } - catch (Exception e) - { - tcs.TrySetException(e); - } - }); - - _ = Task.Delay(3000).ContinueWith(_ => tcs.TrySetCanceled()); - - var element = await tcs.Task; - - Console.WriteLine("XML:\n" + element.ToString(XmlFormatting.Indented)); - } - - [TestMethod] - public async Task ParseFromString() - { - // .NET XmlReader consider those whitespaces after XML Decl invalid and cannot parse, while expat just skip since it's still a well-formed XML. - - var xml = @" - - - - - - - - - - - - - - - - - - - - - - - - -"; - - using var parser = new ExpatXmppParser(); - - var tcs = new TaskCompletionSource(); - - parser.OnStreamElement += e => - { - tcs.TrySetResult(e); - return Task.CompletedTask; - }; - - using var stream = new MemoryStream(xml.GetBytes()); - stream.Position = 0; - - _ = Task.Run(async () => - { - // simulate IO - - try - { - var buf = new byte[16]; - int cnt; - - while (true) - { - cnt = await stream.ReadAsync(buf); - parser.Write(buf, cnt, cnt == 0); - - if (cnt == 0) - break; - } - } - catch (Exception e) - { - tcs.TrySetException(e); - } - }); - - _ = Task.Delay(3000).ContinueWith(_ => tcs.TrySetCanceled()); - - var element = await tcs.Task; - - Console.WriteLine("XML:\n" + element.ToString(XmlFormatting.Indented)); - } - - [TestMethod] - public async Task ParseFromZipFile() - { - using var fs = File.OpenRead(Path.Combine(Directory.GetCurrentDirectory(), "zipfile.zip")); - using var archive = new ZipArchive(fs, ZipArchiveMode.Read); - - var entry = archive.GetEntry("snippet.xml"); - Assert.IsNotNull(entry); - - using var parser = new ExpatXmppParser(); - - var tcs = new TaskCompletionSource(); - - parser.OnStreamElement += e => - { - tcs.TrySetResult(e); - return Task.CompletedTask; - }; - - using var stream = entry.Open(); - - _ = Task.Run(async () => - { - // simulate IO - - var buf = new byte[entry.Length / 8]; - int cnt; - - while (true) - { - cnt = await stream.ReadAsync(buf); - parser.Write(buf, cnt, cnt == 0); - - if (cnt == 0) - break; - } - }); - - _ = Task.Delay(3000).ContinueWith(_ => tcs.TrySetCanceled()); - - var element = await tcs.Task; - - Assert.AreEqual("CodeSnippets", element.TagName); - Assert.AreEqual("http://schemas.microsoft.com/VisualStudio/2005/CodeSnippet", element.DefaultNamespace); - Assert.AreEqual("CodeSnippet", element.FirstChild.TagName); - - Console.WriteLine("XML:\n" + element.ToString(XmlFormatting.None) + "\n"); - - Dump(element); - } - - [TestMethod] - public async Task ParseRealSample() - { - var xml = @" - - - -"; - - using var parser = new ExpatXmppParser(); - - var tcs = new TaskCompletionSource(); - - parser.OnStreamElement += e => - { - tcs.TrySetResult(e); - return Task.CompletedTask; - }; - - using var stream = new MemoryStream(xml.GetBytes()); - stream.Position = 0; - - _ = Task.Run(async () => - { - // simulate IO - - try - { - var buf = new byte[16]; - int cnt; - - while (true) - { - cnt = await stream.ReadAsync(buf); - parser.Write(buf, cnt, cnt == 0); - - if (cnt == 0) - break; - } - } - catch (Exception e) - { - tcs.TrySetException(e); - } - }); - - _ = Task.Delay(3000).ContinueWith(_ => tcs.TrySetCanceled()); - - var element = await tcs.Task; - - Console.WriteLine("XML:\n" + element.ToString(XmlFormatting.Indented)); - } - - static void Dump(Element e, int depth = 0) - { - var tab = new string(' ', depth); - - Console.Write(tab + "Element: " + e.TagName); - - tab = new string(' ', depth + 1); - - if (string.IsNullOrWhiteSpace(e.Value)) - Console.WriteLine(); - else - Console.WriteLine(" (value: {0})", e.Value); - - foreach (var (key, value) in e.Attributes()) - { - if (key == "xmlns " && value == e.Parent?.GetAttribute(key)) - continue; - - Console.WriteLine(tab + "Attribute: {0}={1}", key, value); - } - - foreach (var child in e.Children()) - Dump(child, depth + 3); - } -} diff --git a/XmppSharp.Test/ParserTests.cs b/XmppSharp.Test/ParserTests.cs index 404954d..ba37ce3 100644 --- a/XmppSharp.Test/ParserTests.cs +++ b/XmppSharp.Test/ParserTests.cs @@ -2,7 +2,7 @@ using System.IO.Compression; using System.Runtime.CompilerServices; using XmppSharp.Dom; -using XmppSharp.Parsers; +using XmppSharp.Parser; namespace XmppSharp.Test; @@ -15,7 +15,7 @@ internal static async Task ParseFromBuffer(string xml, [CallerMemberNam stream.Write(xml.GetBytes()); stream.Position = 0; - using var parser = new DefaultXmppParser(stream, bufferSize: 16); + using var parser = new XmppStreamParser(stream); var tcs = new TaskCompletionSource(); @@ -159,7 +159,7 @@ public async Task ParseWithInputStream() await ms.WriteAsync("".GetBytes()); ms.Position = 0; - using var parser = new DefaultXmppParser(ms); + using var parser = new XmppStreamParser(ms); Element el = default!; @@ -188,7 +188,7 @@ public async Task ParseWithInputStreamAndCompetition() await ms.WriteAsync("".GetBytes()); ms.Position = 0; - using var parser = new DefaultXmppParser(ms); + using var parser = new XmppStreamParser(ms); var tcs = new TaskCompletionSource(); @@ -232,7 +232,7 @@ public async Task ParseWithFactoryStream() await ms.WriteAsync("".GetBytes()); ms.Position = 0; - using var parser = new DefaultXmppParser(() => ms); + using var parser = new XmppStreamParser(() => ms); var el = await parser.GetNextElementAsync(); Console.WriteLine("parser::advance(): false"); @@ -252,7 +252,7 @@ public async Task ParseFromZipEntry() using var stream = entry.Open(); - using var parser = new DefaultXmppParser(stream); + using var parser = new XmppStreamParser(stream); var element = await parser.GetNextElementAsync(); Assert.AreEqual("CodeSnippets", element.TagName); diff --git a/XmppSharp/CHANGELOG.md b/XmppSharp/CHANGELOG.md index b8e9610..35f4598 100644 --- a/XmppSharp/CHANGELOG.md +++ b/XmppSharp/CHANGELOG.md @@ -55,16 +55,21 @@ ____ *3.1.5* -- Rename `XmppParser` to `DefaultXmppParser` to indicade this uses regular .NET `XmlReader` to parse xmpp packets. - Add basic abstraction layer to implement your own xmpp parser. Also i'm releasing a separated package `XmppSharp.Expat` to provide expat XMPP parser implementation. (Note: You must install native libraries to use expat. - Added `AsyncHelper` (from `AspNetCore` repo) to calling async functions in sync methods. -> In **XMPP#** repository i did an github actions to automatically build expat using vcpkg with most common systems: ubuntu, macos, windows but only x64 is working at this moment). Consider using [XmppShap.Expat](https://www.nuget.org/packages/XmppSharp.Expat/) package too if you need a fast and stable parser. - ____ *3.1.6* - +____ - Minor improvements. - Fixed wrong indent chars & side for default formatting options. -- Fixed `Element.Value` returning entire inner text from all descendant nodes. \ No newline at end of file +- Fixed `Element.Value` returning entire inner text from all descendant nodes. + +*3.1.7* +____ +- Minor improvements. +- Renamed `DefaultXmppParser` -> `XmppStreamParser` +- Added helper methods for fast loading XML from string and streams. +- Improvements to help detect when parser really completed parsing. +- Added missing TimeSpan parser in `TryParseHelpers`. \ No newline at end of file diff --git a/XmppSharp/Parsers/BaseXmppParser.cs b/XmppSharp/Parsers/BaseXmppParser.cs index 403ba5a..f097dab 100644 --- a/XmppSharp/Parsers/BaseXmppParser.cs +++ b/XmppSharp/Parsers/BaseXmppParser.cs @@ -1,8 +1,11 @@ using XmppSharp.Factory; using XmppSharp.Protocol.Base; -namespace XmppSharp.Parsers; +namespace XmppSharp.Parser; +/// +/// Base class to implement an XMPP parser. +/// public abstract class BaseXmppParser : IDisposable { /// @@ -36,6 +39,9 @@ protected void EnsureNotDisposed() throw new ObjectDisposedException(GetType().FullName); } + /// + /// Method that is called when disposing the parser. + /// protected virtual void Disposing() { @@ -43,11 +49,12 @@ protected virtual void Disposing() public void Dispose() { - if (_disposed) - return; - - _disposed = true; - Disposing(); + if (!_disposed) + { + _disposed = true; + Disposing(); + GC.SuppressFinalize(this); + } } protected async Task FireStreamStart(StreamStream e) diff --git a/XmppSharp/Parsers/DefaultXmppParser.cs b/XmppSharp/Parsers/XmppStreamParser.cs similarity index 69% rename from XmppSharp/Parsers/DefaultXmppParser.cs rename to XmppSharp/Parsers/XmppStreamParser.cs index a6f77a0..06de00d 100644 --- a/XmppSharp/Parsers/DefaultXmppParser.cs +++ b/XmppSharp/Parsers/XmppStreamParser.cs @@ -6,16 +6,15 @@ using XmppSharp.Factory; using XmppSharp.Protocol.Base; -namespace XmppSharp.Parsers; +namespace XmppSharp.Parser; /// /// An default XMPP parser implemented on top of . /// -public class DefaultXmppParser : BaseXmppParser +public class XmppStreamParser : BaseXmppParser { private XmlReader _reader; - private StreamReader _textReader; private NameTable _nameTable = new(); private volatile bool _disposed; @@ -24,25 +23,12 @@ public class DefaultXmppParser : BaseXmppParser private Func _streamFactory; private Stream _baseStream; - private readonly Encoding _encoding; - private readonly int _bufferSize; - - public const int DefaultBufferSize = 256; - - DefaultXmppParser(Encoding? encoding, int bufferSize) - { - this._encoding = encoding ?? Encoding.UTF8; - this._bufferSize = bufferSize <= 0 ? DefaultBufferSize : bufferSize; - } - /// - /// Initializes a new instance of . Use this constructor for generic purposes, where the base type of the stream will not change (eg: loading from file). + /// Initializes a new instance of . Use this constructor for generic purposes, where the base type of the stream will not change (eg: loading from file). /// /// Stream that will be used to read the characters. /// Determines whether the stream should remain open after dispose this parser. - /// Determines which type of character encoding to be used. (Default: ) - /// Buffer size in chars for the internal . (Default: ) - public DefaultXmppParser(Stream stream, bool leaveOpen = true, Encoding? encoding = default, int bufferSize = -1) : this(encoding, bufferSize) + public XmppStreamParser(Stream stream, bool leaveOpen = true) { Require.NotNull(stream); @@ -54,12 +40,11 @@ public DefaultXmppParser(Stream stream, bool leaveOpen = true, Encoding? encodin } /// - /// Initializes a new instance of . Use this constructor only if the stream can change according to the connection state (eg: connection upgrade from raw stream to ssl stream). + /// Initializes a new instance of . Use this constructor only if the stream can change according to the connection state (eg: connection upgrade from raw stream to ssl stream). /// /// Factory function to get the stream when is called. - /// Determines which type of character encoding to be used. (Default: ) - /// Buffer size in chars for the internal . (Default: ) - public DefaultXmppParser(Func streamFactory, Encoding? encoding = default, int bufferSize = -1) : this(encoding, bufferSize) + /// Determines whether the stream should remain open after dispose this parser. + public XmppStreamParser(Func streamFactory, bool leaveOpen = true) { Require.NotNull(streamFactory); @@ -76,18 +61,15 @@ protected override void Disposing() this._disposed = true; - if (this._isFromFactory) - this._streamFactory = null; - else - { - if (!this._leaveOpen) - this._baseStream?.Dispose(); + if (!this._leaveOpen) + this._baseStream?.Dispose(); - this._baseStream = null; - } + this._baseStream = null; + this._streamFactory = null; this._reader?.Dispose(); - this._textReader?.Dispose(); + this._reader = null; + this._nameTable = null; } @@ -108,11 +90,10 @@ internal class ThrowingResolver : XmlResolver /// /// Restarts the state of the XML parser. /// - /// If this instance of has already been disposed. + /// If this instance of has already been disposed. public virtual void Reset() { this._reader?.Dispose(); - this._textReader?.Dispose(); #if NET7_0_OR_GREATER ObjectDisposedException.ThrowIf(this._disposed, this); @@ -120,18 +101,21 @@ public virtual void Reset() if (this._disposed) throw new ObjectDisposedException(GetType().FullName, "Cannot reset parser in a disposed parser."); #endif - this._textReader = new StreamReader(this._isFromFactory - ? this._streamFactory() - : this._baseStream, this._encoding, false, this._bufferSize, true); - this._reader = XmlReader.Create(this._textReader, new() + if (this._isFromFactory) + this._baseStream = this._streamFactory(); + + this._reader = XmlReader.Create(this._baseStream, new() { CloseInput = false, Async = true, IgnoreWhitespace = true, IgnoreProcessingInstructions = true, - ConformanceLevel = ConformanceLevel.Auto, - DtdProcessing = DtdProcessing.Prohibit, + ConformanceLevel = ConformanceLevel.Fragment, + + // More info: https://en.wikipedia.org/wiki/Billion_laughs_attack + DtdProcessing = DtdProcessing.Ignore, + #if NET7_0_OR_GREATER XmlResolver = XmlResolver.ThrowingResolver, #else @@ -165,7 +149,7 @@ public int Depth } public virtual bool Advance() - => AsyncHelper.RunSync(() => AdvanceAsync()); + => AsyncHelper.RunSync(AdvanceAsync); public virtual async Task AdvanceAsync() { @@ -175,9 +159,6 @@ public virtual async Task AdvanceAsync() if (this._reader == null) return false; - if (this._reader.EOF) - return false; - bool result; try @@ -186,6 +167,9 @@ public virtual async Task AdvanceAsync() } catch (XmlException e) { + if (_reader.EOF) + return false; + throw new JabberStreamException(StreamErrorCondition.InvalidXml, e); } @@ -250,7 +234,7 @@ public virtual async Task AdvanceAsync() else { if (this._rootElem == null) - throw new JabberStreamException(StreamErrorCondition.InvalidXml, "The element in the current scope was not expected to be null."); + throw new JabberStreamException(StreamErrorCondition.InvalidXml, "Unexcepted end tag."); var parent = this._rootElem.Parent; diff --git a/XmppSharp/Require.cs b/XmppSharp/Require.cs index 74f8a63..36bdd5c 100644 --- a/XmppSharp/Require.cs +++ b/XmppSharp/Require.cs @@ -6,6 +6,30 @@ namespace XmppSharp; [StackTraceHidden] static class Require { + public static void NotDisposed(bool condition, object value) + { + NotNull(value); + +#if NET7_0_OR_GREATER + ObjectDisposedException.ThrowIf(condition, value); +#else + if (condition) + throw new ObjectDisposedException(value.GetType().FullName); +#endif + } + + public static void NotDisposed(bool condition, Type type) + { + NotNull(type); + +#if NET7_0_OR_GREATER + ObjectDisposedException.ThrowIf(condition, type); +#else + if (condition) + throw new ObjectDisposedException(type.FullName); +#endif + } + public static void NotNull(object value, [CallerArgumentExpression(nameof(value))] string expression = default) { if (value is null) diff --git a/XmppSharp/TryParseHelpers.cs b/XmppSharp/TryParseHelpers.cs index 110444d..4148538 100644 --- a/XmppSharp/TryParseHelpers.cs +++ b/XmppSharp/TryParseHelpers.cs @@ -18,22 +18,31 @@ namespace XmppSharp; /// public static class TryParseHelpers { + + static readonly Dictionary _typeMap = new() + { + [typeof(sbyte)] = Int8, + [typeof(byte)] = UInt8, + [typeof(short)] = Int16, + [typeof(int)] = Int32, + [typeof(long)] = Int64, + [typeof(ushort)] = UInt16, + [typeof(uint)] = UInt32, + [typeof(ulong)] = UInt64, + [typeof(float)] = Float, + [typeof(double)] = Double, + [typeof(bool)] = Boolean, + [typeof(Guid)] = Guid, + [typeof(DateTime)] = DateTime, + [typeof(DateTimeOffset)] = DateTimeOffset, + [typeof(TimeSpan)] = TimeSpan + }; + public static Delegate GetConverter(Type type) { - if (type == typeof(sbyte)) return Int8; - else if (type == typeof(byte)) return UInt8; - else if (type == typeof(short)) return Int16; - else if (type == typeof(int)) return Int32; - else if (type == typeof(long)) return Int64; - else if (type == typeof(ushort)) return UInt16; - else if (type == typeof(uint)) return UInt32; - else if (type == typeof(ulong)) return UInt64; - else if (type == typeof(float)) return Float; - else if (type == typeof(double)) return Double; - else if (type == typeof(bool)) return Boolean; - else if (type == typeof(Guid)) return Guid; - else if (type == typeof(DateTime)) return DateTime; - else if (type == typeof(DateTimeOffset)) return DateTimeOffset; + if (_typeMap.TryGetValue(type, out var func)) + return func; + else return null; } @@ -97,10 +106,23 @@ public static Delegate GetConverter(Type type) /// public static TryParseDelegate Guid { get; } = System.Guid.TryParse; + /// + /// Parser implementation for the type. + /// public static TryParseDelegate DateTime { get; } = System.DateTime.TryParse; + /// + /// Parser implementation for the type. + /// public static TryParseDelegate DateTimeOffset { get; } = System.DateTimeOffset.TryParse; + /// + /// Parser implementation for the type. + /// + public static TryParseDelegate TimeSpan { get; } = System.TimeSpan.TryParse; + + // ---------------------------------------------------------------------------------------------------------- // + static bool TryParseFloat(ReadOnlySpan span, out float result) => float.TryParse(span, NumberStyles.Float, CultureInfo.InvariantCulture, out result); diff --git a/XmppSharp/Utilities.cs b/XmppSharp/Utilities.cs index a4e911e..0544287 100644 --- a/XmppSharp/Utilities.cs +++ b/XmppSharp/Utilities.cs @@ -1,7 +1,7 @@ using System.Runtime.CompilerServices; using System.Text; using System.Xml; -using XmppSharp.Parsers; +using XmppSharp.Parser; namespace XmppSharp; @@ -32,7 +32,7 @@ public static string ToHex(this byte[] bytes, bool lowercase = true) return result.ToLowerInvariant(); } - public static async Task GetNextElementAsync(this DefaultXmppParser parser, CancellationToken token = default) + public static async Task GetNextElementAsync(this XmppStreamParser parser, CancellationToken token = default) { var tcs = new TaskCompletionSource(); @@ -74,7 +74,7 @@ public static async Task GetNextElementAsync(this DefaultXmppParser par return await tcs.Task; } - public static TaskAwaiter GetAwaiter(this DefaultXmppParser parser) + public static TaskAwaiter GetAwaiter(this XmppStreamParser parser) => parser.AdvanceAsync().GetAwaiter(); public static byte[] FromHex(this string str) diff --git a/XmppSharp/Xml.cs b/XmppSharp/Xml.cs index 79e7d13..39df60e 100644 --- a/XmppSharp/Xml.cs +++ b/XmppSharp/Xml.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using System.Text; using System.Xml; +using XmppSharp.Parser; namespace XmppSharp; @@ -36,14 +37,27 @@ public static XmlQualifiedName ExtractQualifiedName(string source) }; } + public static async Task FromStringAsync(string xml, CancellationToken token = default) + { + using (var ms = new MemoryStream(xml.GetBytes())) + using (var parser = new XmppStreamParser(ms)) + return await parser.GetNextElementAsync(token); + } + + public static async Task FromStreamAsync(Stream stream, CancellationToken token = default) + { + using (var parser = new XmppStreamParser(stream)) + return await parser.GetNextElementAsync(token); + } + internal static XmlWriter CreateWriter(StringBuilder output, XmlFormatting formatting) { Require.NotNull(output); var settings = new XmlWriterSettings { - Indent = formatting.IndentSize != 0, - IndentChars = formatting.IndentChars, + Indent = formatting.IndentSize > 0, + IndentChars = new string(formatting.IndentChar, formatting.IndentSize), DoNotEscapeUriAttributes = formatting.DoNotEscapeUriAttributes, WriteEndDocumentOnClose = formatting.WriteEndDocumentOnClose, NewLineHandling = formatting.NewLineHandling, @@ -52,8 +66,8 @@ internal static XmlWriter CreateWriter(StringBuilder output, XmlFormatting forma CloseOutput = true, ConformanceLevel = ConformanceLevel.Fragment, Encoding = Encoding.UTF8, - NamespaceHandling = NamespaceHandling.OmitDuplicates, - OmitXmlDeclaration = true, + NamespaceHandling = formatting.NamespaceHandling, + OmitXmlDeclaration = formatting.OmitXmlDeclaration, NewLineChars = formatting.NewLineChars }; diff --git a/XmppSharp/XmlFormatting.cs b/XmppSharp/XmlFormatting.cs index b2814c0..dd55098 100644 --- a/XmppSharp/XmlFormatting.cs +++ b/XmppSharp/XmlFormatting.cs @@ -12,9 +12,14 @@ namespace XmppSharp; public readonly struct XmlFormatting { /// - /// Determines whether to omit xmlns declarations in child elements. + /// Determines the behavior of declaring namespaces in the XML element. (Default: ) /// - public bool OmitDuplicatedNamespaces { get; init; } + public NamespaceHandling NamespaceHandling { get; init; } + + /// + /// Determines whether the XML output will omit the prologue, i.e. ]]>. (Default: ) + /// + public bool OmitXmlDeclaration { get; init; } /// /// Determines whether to escape the URI attributes. Default: . @@ -37,14 +42,14 @@ public readonly struct XmlFormatting public bool IncludeTextNodes { get; init; } /// - /// Determines the indentation size of the XML. Greater than zero means the XML will be formatted. + /// Determines whether the XML string will be formatted. (Default: ) /// public int IndentSize { get; init; } /// - /// Sets the character used for indentation. It can be a tab character ('\t') or a space (' ') as long as it complies with XML standards. + /// Sets the character used for indentation. (Default: ) /// - public string IndentChars { get; init; } + public char IndentChar { get; init; } /// /// Gets or sets a value that indicates whether the System.Xml.XmlWriter will add closing tags to all unclosed element tags when the method is called. @@ -52,7 +57,7 @@ public readonly struct XmlFormatting public bool WriteEndDocumentOnClose { get; init; } /// - /// Gets or sets a value indicating whether to normalize line breaks in the output. (Default: + /// Gets or sets a value indicating whether to normalize line breaks in the output. (Default: ) /// public NewLineHandling NewLineHandling { get; init; } @@ -76,11 +81,13 @@ public XmlFormatting() IncludeTextNodes = true; WriteEndDocumentOnClose = true; - OmitDuplicatedNamespaces = true; DoNotEscapeUriAttributes = false; + NamespaceHandling = NamespaceHandling.OmitDuplicates; + OmitXmlDeclaration = true; + IndentSize = 0; - IndentChars = " "; + IndentChar = (char)0; NewLineHandling = NewLineHandling.Replace; NewLineOnAttributes = false; @@ -98,5 +105,6 @@ public XmlFormatting() public static XmlFormatting Indented { get; } = None with { IndentSize = 4, + IndentChar = ' ' }; } diff --git a/XmppSharp/XmppSharp.csproj b/XmppSharp/XmppSharp.csproj index 16fe6df..ba51cf5 100644 --- a/XmppSharp/XmppSharp.csproj +++ b/XmppSharp/XmppSharp.csproj @@ -26,7 +26,7 @@ - 3.1.6 + 3.1.7 true nathan130200 git