diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 3241968439878..457f414bd8c24 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -1,4 +1,4 @@ - + System.Text.RegularExpressions $(DefineConstants);FEATURE_COMPILED @@ -7,8 +7,9 @@ enable - + + @@ -16,8 +17,8 @@ - + @@ -41,6 +42,7 @@ + @@ -56,7 +58,6 @@ Common\System\Text\ValueStringBuilder.cs - diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs index 6c5610b02c708..6cf13391110d9 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Capture.cs @@ -2,11 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// Capture is just a location/length pair that indicates the -// location of a regular expression match. A single regexp -// search may return multiple Capture within each capturing -// RegexGroup. - namespace System.Text.RegularExpressions { /// @@ -22,20 +17,13 @@ internal Capture(string text, int index, int length) Length = length; } - /// - /// Returns the position in the original string where the first character of - /// captured substring was found. - /// + /// Returns the position in the original string where the first character of captured substring was found. public int Index { get; private protected set; } - /// - /// Returns the length of the captured substring. - /// + /// Returns the length of the captured substring. public int Length { get; private protected set; } - /// - /// The original string - /// + /// The original string internal string Text { get; private protected set; } /// @@ -43,19 +31,13 @@ internal Capture(string text, int index, int length) /// public string Value => Text.Substring(Index, Length); - /// - /// Returns the substring that was matched. - /// + /// Returns the substring that was matched. public override string ToString() => Value; - /// - /// The substring to the left of the capture - /// - internal ReadOnlySpan GetLeftSubstring() => Text.AsSpan(0, Index); + /// The substring to the left of the capture + internal ReadOnlyMemory GetLeftSubstring() => Text.AsMemory(0, Index); - /// - /// The substring to the right of the capture - /// - internal ReadOnlySpan GetRightSubstring() => Text.AsSpan(Index + Length, Text.Length - Index - Length); + /// The substring to the right of the capture + internal ReadOnlyMemory GetRightSubstring() => Text.AsMemory(Index + Length, Text.Length - Index - Length); } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CaptureCollection.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CaptureCollection.cs index 6f4413bd88168..4f60548533e68 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CaptureCollection.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CaptureCollection.cs @@ -2,19 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// The CaptureCollection lists the captured Capture numbers -// contained in a compiled Regex. - using System.Collections; using System.Collections.Generic; using System.Diagnostics; namespace System.Text.RegularExpressions { - // This collection returns the Captures for a group - // in the order in which they were matched (left to right - // or right to left). It is created by Group.Captures. - /// /// Represents a sequence of capture substrings. The object is used /// to return the set of captures done by a single capturing group. @@ -35,36 +28,32 @@ internal CaptureCollection(Group group) public bool IsReadOnly => true; - /// - /// Returns the number of captures. - /// + /// Returns the number of captures. public int Count => _capcount; - /// - /// Returns a specific capture, by index, in this collection. - /// + /// Returns a specific capture, by index, in this collection. public Capture this[int i] => GetCapture(i); - /// - /// Provides an enumerator in the same order as Item[]. - /// + /// Provides an enumerator in the same order as Item[]. public IEnumerator GetEnumerator() => new Enumerator(this); IEnumerator IEnumerable.GetEnumerator() => new Enumerator(this); - /// - /// Returns the set of captures for the group - /// + /// Returns the set of captures for the group private Capture GetCapture(int i) { - if (i == _capcount - 1 && i >= 0) + if ((uint)i == _capcount - 1) + { return _group; + } if (i >= _capcount || i < 0) - throw new ArgumentOutOfRangeException(nameof(i)); + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.i); + } // first time a capture is accessed, compute them all - if (_captures == null) + if (_captures is null) { ForceInitialized(); Debug.Assert(_captures != null); @@ -91,8 +80,10 @@ internal void ForceInitialized() public void CopyTo(Array array, int arrayIndex) { - if (array == null) - throw new ArgumentNullException(nameof(array)); + if (array is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } for (int i = arrayIndex, j = 0; j < Count; i++, j++) { @@ -102,12 +93,18 @@ public void CopyTo(Array array, int arrayIndex) public void CopyTo(Capture[] array, int arrayIndex) { - if (array == null) - throw new ArgumentNullException(nameof(array)); - if (arrayIndex < 0 || arrayIndex > array.Length) - throw new ArgumentOutOfRangeException(nameof(arrayIndex)); + if (array is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } + if ((uint)arrayIndex > (uint)array.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.arrayIndex); + } if (array.Length - arrayIndex < Count) + { throw new ArgumentException(SR.Arg_ArrayPlusOffTooSmall); + } for (int i = arrayIndex, j = 0; j < Count; i++, j++) { @@ -128,77 +125,57 @@ int IList.IndexOf(Capture item) return -1; } - void IList.Insert(int index, Capture item) - { + void IList.Insert(int index, Capture item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.RemoveAt(int index) - { + void IList.RemoveAt(int index) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } Capture IList.this[int index] { - get { return this[index]; } - set { throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } + get => this[index]; + set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } - void ICollection.Add(Capture item) - { + void ICollection.Add(Capture item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void ICollection.Clear() - { + void ICollection.Clear() => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool ICollection.Contains(Capture item) => ((IList)this).IndexOf(item) >= 0; - bool ICollection.Remove(Capture item) - { + bool ICollection.Remove(Capture item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - int IList.Add(object? value) - { + int IList.Add(object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.Clear() - { + void IList.Clear() => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool IList.Contains(object? value) => - value is Capture && ((ICollection)this).Contains((Capture)value); + value is Capture other && ((ICollection)this).Contains(other); int IList.IndexOf(object? value) => - value is Capture ? ((IList)this).IndexOf((Capture)value) : -1; + value is Capture other ? ((IList)this).IndexOf(other) : -1; - void IList.Insert(int index, object? value) - { + void IList.Insert(int index, object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool IList.IsFixedSize => true; - void IList.Remove(object? value) - { + void IList.Remove(object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.RemoveAt(int index) - { + void IList.RemoveAt(int index) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } object? IList.this[int index] { - get { return this[index]; } - set { throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } + get => this[index]; + set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } private sealed class Enumerator : IEnumerator @@ -219,7 +196,9 @@ public bool MoveNext() int size = _collection.Count; if (_index >= size) + { return false; + } _index++; @@ -231,7 +210,9 @@ public Capture Current get { if (_index < 0 || _index >= _collection.Count) + { throw new InvalidOperationException(SR.EnumNotStarted); + } return _collection[_index]; } @@ -239,10 +220,7 @@ public Capture Current object IEnumerator.Current => Current; - void IEnumerator.Reset() - { - _index = -1; - } + void IEnumerator.Reset() => _index = -1; void IDisposable.Dispose() { } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CollectionDebuggerProxy.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CollectionDebuggerProxy.cs index e8ef9dc8962f0..39c05a0d5e167 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CollectionDebuggerProxy.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/CollectionDebuggerProxy.cs @@ -11,10 +11,8 @@ internal sealed class CollectionDebuggerProxy { private readonly ICollection _collection; - public CollectionDebuggerProxy(ICollection collection) - { + public CollectionDebuggerProxy(ICollection collection) => _collection = collection ?? throw new ArgumentNullException(nameof(collection)); - } [DebuggerBrowsable(DebuggerBrowsableState.RootHidden)] public T[] Items diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs index 54d6f23f03828..9245ac3a83f19 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Group.cs @@ -2,10 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// Group represents the substring or substrings that -// are captured by a single capturing group after one -// regular expression match. - namespace System.Text.RegularExpressions { /// @@ -22,17 +18,14 @@ public class Group : Capture internal CaptureCollection? _capcoll; internal Group(string text, int[] caps, int capcount, string name) - : base(text, capcount == 0 ? 0 : caps[(capcount - 1) * 2], - capcount == 0 ? 0 : caps[(capcount * 2) - 1]) + : base(text, capcount == 0 ? 0 : caps[(capcount - 1) * 2], capcount == 0 ? 0 : caps[(capcount * 2) - 1]) { _caps = caps; _capcount = capcount; Name = name; } - /// - /// Indicates whether the match is successful. - /// + /// Indicates whether the match is successful. public bool Success => _capcount != 0; public string Name { get; } @@ -45,13 +38,14 @@ internal Group(string text, int[] caps, int capcount, string name) public CaptureCollection Captures => _capcoll ??= new CaptureCollection(this); /// - /// Returns a Group object equivalent to the one supplied that is safe to share between - /// multiple threads. + /// Returns a Group object equivalent to the one supplied that is safe to share between multiple threads. /// public static Group Synchronized(Group inner) { if (inner == null) - throw new ArgumentNullException(nameof(inner)); + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.inner); + } // force Captures to be computed. CaptureCollection capcoll = inner.Captures; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/GroupCollection.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/GroupCollection.cs index a3b4920c077a1..71f797ca3acbd 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/GroupCollection.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/GroupCollection.cs @@ -2,9 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// The GroupCollection lists the captured Capture numbers -// contained in a compiled Regex. - using System.Collections; using System.Collections.Generic; using System.Diagnostics; @@ -23,7 +20,7 @@ public class GroupCollection : IList, IReadOnlyList, IList, IReadO private readonly Match _match; private readonly Hashtable? _captureMap; - // cache of Group objects fed to the user + /// Cache of Group objects fed to the user. private Group[]? _groups; internal GroupCollection(Match match, Hashtable? caps) @@ -32,22 +29,20 @@ internal GroupCollection(Match match, Hashtable? caps) _captureMap = caps; } + internal void Reset() => _groups = null; + public bool IsReadOnly => true; - /// - /// Returns the number of groups. - /// + /// Returns the number of groups. public int Count => _match._matchcount.Length; public Group this[int groupnum] => GetGroup(groupnum); - public Group this[string groupname] => _match._regex == null ? + public Group this[string groupname] => _match._regex is null ? Group.s_emptyGroup : GetGroup(_match._regex.GroupNumberFromName(groupname)); - /// - /// Provides an enumerator in the same order as Item[]. - /// + /// Provides an enumerator in the same order as Item[]. public IEnumerator GetEnumerator() => new Enumerator(this); IEnumerator IEnumerable.GetEnumerator() => new Enumerator(this); @@ -61,7 +56,7 @@ private Group GetGroup(int groupnum) return GetGroupImpl(groupNumImpl); } } - else if (groupnum < _match._matchcount.Length && groupnum >= 0) + else if ((uint)groupnum < _match._matchcount.Length) { return GetGroupImpl(groupnum); } @@ -75,11 +70,12 @@ private Group GetGroup(int groupnum) private Group GetGroupImpl(int groupnum) { if (groupnum == 0) + { return _match; + } // Construct all the Group objects the first time GetGroup is called - - if (_groups == null) + if (_groups is null) { _groups = new Group[_match._matchcount.Length - 1]; for (int i = 0; i < _groups.Length; i++) @@ -98,8 +94,10 @@ private Group GetGroupImpl(int groupnum) public void CopyTo(Array array, int arrayIndex) { - if (array == null) - throw new ArgumentNullException(nameof(array)); + if (array is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } for (int i = arrayIndex, j = 0; j < Count; i++, j++) { @@ -109,12 +107,18 @@ public void CopyTo(Array array, int arrayIndex) public void CopyTo(Group[] array, int arrayIndex) { - if (array == null) - throw new ArgumentNullException(nameof(array)); + if (array is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); + } if (arrayIndex < 0 || arrayIndex > array.Length) + { throw new ArgumentOutOfRangeException(nameof(arrayIndex)); + } if (array.Length - arrayIndex < Count) + { throw new ArgumentException(SR.Arg_ArrayPlusOffTooSmall); + } for (int i = arrayIndex, j = 0; j < Count; i++, j++) { @@ -124,92 +128,72 @@ public void CopyTo(Group[] array, int arrayIndex) int IList.IndexOf(Group item) { - var comparer = EqualityComparer.Default; for (int i = 0; i < Count; i++) { - if (comparer.Equals(this[i], item)) + if (EqualityComparer.Default.Equals(this[i], item)) + { return i; + } } + return -1; } - void IList.Insert(int index, Group item) - { + void IList.Insert(int index, Group item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.RemoveAt(int index) - { + void IList.RemoveAt(int index) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } Group IList.this[int index] { - get { return this[index]; } - set { throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } + get => this[index]; + set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } - void ICollection.Add(Group item) - { + void ICollection.Add(Group item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void ICollection.Clear() - { + void ICollection.Clear() => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool ICollection.Contains(Group item) => ((IList)this).IndexOf(item) >= 0; - bool ICollection.Remove(Group item) - { + bool ICollection.Remove(Group item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - int IList.Add(object? value) - { + int IList.Add(object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.Clear() - { + void IList.Clear() => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool IList.Contains(object? value) => - value is Group && ((ICollection)this).Contains((Group)value); + value is Group other && ((ICollection)this).Contains(other); int IList.IndexOf(object? value) => - value is Group ? ((IList)this).IndexOf((Group)value) : -1; + value is Group other ? ((IList)this).IndexOf(other) : -1; - void IList.Insert(int index, object? value) - { + void IList.Insert(int index, object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool IList.IsFixedSize => true; - void IList.Remove(object? value) - { + void IList.Remove(object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.RemoveAt(int index) - { + void IList.RemoveAt(int index) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } object? IList.this[int index] { - get { return this[index]; } - set { throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } + get => this[index]; + set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } - IEnumerator> IEnumerable>.GetEnumerator() - { - return new Enumerator(this); - } + IEnumerator> IEnumerable>.GetEnumerator() => + new Enumerator(this); #pragma warning disable CS8614 // Nullability of reference types in type of parameter doesn't match implicitly implemented member. public bool TryGetValue(string key, [NotNullWhen(true)] out Group? value) @@ -226,10 +210,7 @@ public bool TryGetValue(string key, [NotNullWhen(true)] out Group? value) return true; } - public bool ContainsKey(string key) - { - return _match._regex!.GroupNumberFromName(key) >= 0; - } + public bool ContainsKey(string key) => _match._regex!.GroupNumberFromName(key) >= 0; public IEnumerable Keys { @@ -271,10 +252,11 @@ public bool MoveNext() int size = _collection.Count; if (_index >= size) + { return false; + } _index++; - return _index < size; } @@ -283,7 +265,9 @@ public Group Current get { if (_index < 0 || _index >= _collection.Count) + { throw new InvalidOperationException(SR.EnumNotStarted); + } return _collection[_index]; } @@ -293,11 +277,12 @@ KeyValuePair IEnumerator>.Current { get { - if (_index < 0 || _index >= _collection.Count) + if ((uint)_index >= _collection.Count) + { throw new InvalidOperationException(SR.EnumNotStarted); + } Group value = _collection[_index]; - return new KeyValuePair(value.Name, value); } @@ -305,10 +290,7 @@ KeyValuePair IEnumerator>.Current object IEnumerator.Current => Current; - void IEnumerator.Reset() - { - _index = -1; - } + void IEnumerator.Reset() => _index = -1; void IDisposable.Dispose() { } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs index a16fd0ec90a17..01c8ee2a081a1 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs @@ -2,41 +2,40 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// Match is the result class for a regex search. -// It returns the location, length, and substring for -// the entire match as well as every captured group. - -// Match is also used during the search to keep track of each capture for each group. This is -// done using the "_matches" array. _matches[x] represents an array of the captures for group x. -// This array consists of start and length pairs, and may have empty entries at the end. _matchcount[x] -// stores how many captures a group has. Note that _matchcount[x]*2 is the length of all the valid -// values in _matches. _matchcount[x]*2-2 is the Start of the last capture, and _matchcount[x]*2-1 is the -// Length of the last capture -// -// For example, if group 2 has one capture starting at position 4 with length 6, -// _matchcount[2] == 1 -// _matches[2][0] == 4 -// _matches[2][1] == 6 -// -// Values in the _matches array can also be negative. This happens when using the balanced match -// construct, "(?...)". When the "end" group matches, a capture is added for both the "start" -// and "end" groups. The capture added for "start" receives the negative values, and these values point to -// the next capture to be balanced. They do NOT point to the capture that "end" just balanced out. The negative -// values are indices into the _matches array transformed by the formula -3-x. This formula also untransforms. -// - using System.Collections; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; -using System.Globalization; namespace System.Text.RegularExpressions { /// /// Represents the results from a single regular expression match. /// + /// + /// Match is the result class for a regex search. + /// It returns the location, length, and substring for + /// the entire match as well as every captured group. + /// + /// Match is also used during the search to keep track of each capture for each group. This is + /// done using the "_matches" array. _matches[x] represents an array of the captures for group x. + /// This array consists of start and length pairs, and may have empty entries at the end. _matchcount[x] + /// stores how many captures a group has. Note that _matchcount[x]*2 is the length of all the valid + /// values in _matches. _matchcount[x]*2-2 is the Start of the last capture, and _matchcount[x]*2-1 is the + /// Length of the last capture + /// + /// For example, if group 2 has one capture starting at position 4 with length 6, + /// _matchcount[2] == 1 + /// _matches[2][0] == 4 + /// _matches[2][1] == 6 + /// + /// Values in the _matches array can also be negative. This happens when using the balanced match + /// construct, "(?<start-end>...)". When the "end" group matches, a capture is added for both the "start" + /// and "end" groups. The capture added for "start" receives the negative values, and these values point to + /// the next capture to be balanced. They do NOT point to the capture that "end" just balanced out. The negative + /// values are indices into the _matches array transformed by the formula -3-x. This formula also untransforms. + /// public class Match : Group { - private const int ReplaceBufferSize = 256; internal GroupCollection? _groupcoll; // input to the match @@ -52,8 +51,8 @@ public class Match : Group internal bool _balancing; // whether we've done any balancing with this match. If we // have done balancing, we'll need to do extra work in Tidy(). - internal Match(Regex? regex, int capcount, string text, int begpos, int len, int startpos) - : base(text, new int[2], 0, "0") + internal Match(Regex? regex, int capcount, string text, int begpos, int len, int startpos) : + base(text, new int[2], 0, "0") { _regex = regex; _matchcount = new int[capcount]; @@ -64,14 +63,11 @@ internal Match(Regex? regex, int capcount, string text, int begpos, int len, int _textstart = startpos; _balancing = false; - // No need for an exception here. This is only called internally, so we'll use an Assert instead - System.Diagnostics.Debug.Assert(!(_textbeg < 0 || _textstart < _textbeg || _textend < _textstart || Text.Length < _textend), - "The parameters are out of range."); + Debug.Assert(!(_textbeg < 0 || _textstart < _textbeg || _textend < _textstart || Text.Length < _textend), + "The parameters are out of range."); } - /// - /// Returns an empty Match object. - /// + /// Returns an empty Match object. public static Match Empty { get; } = new Match(null, 1, string.Empty, 0, 0, 0); internal void Reset(Regex regex, string text, int textbeg, int textend, int textstart) @@ -89,6 +85,7 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text } _balancing = false; + _groupcoll?.Reset(); } public virtual GroupCollection Groups => _groupcoll ??= new GroupCollection(this, null); @@ -100,10 +97,10 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text /// public Match NextMatch() { - if (_regex == null) - return this; - - return _regex.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)!; + Regex? r = _regex; + return r != null ? + r.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! : + this; } /// @@ -113,34 +110,38 @@ public Match NextMatch() /// public virtual string Result(string replacement) { - if (replacement == null) - throw new ArgumentNullException(nameof(replacement)); + if (replacement is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.replacement); + } - if (_regex == null) + Regex? regex = _regex; + if (regex is null) + { throw new NotSupportedException(SR.NoResultOnFailed); + } // Gets the weakly cached replacement helper or creates one if there isn't one already. - RegexReplacement repl = RegexReplacement.GetOrCreate(_regex._replref!, replacement, _regex.caps!, _regex.capsize, _regex.capnames!, _regex.roptions); - var vsb = new ValueStringBuilder(stackalloc char[ReplaceBufferSize]); - repl.ReplacementImpl(ref vsb, this); - return vsb.ToString(); + RegexReplacement repl = RegexReplacement.GetOrCreate(regex._replref!, replacement, regex.caps!, regex.capsize, regex.capnames!, regex.roptions); + var segments = new SegmentStringBuilder(256); + repl.ReplacementImpl(ref segments, this); + return segments.ToString(); } - internal ReadOnlySpan GroupToStringImpl(int groupnum) + internal ReadOnlyMemory GroupToStringImpl(int groupnum) { int c = _matchcount[groupnum]; if (c == 0) - return string.Empty; + { + return default; + } int[] matches = _matches[groupnum]; - - return Text.AsSpan(matches[(c - 1) * 2], matches[(c * 2) - 1]); + return Text.AsMemory(matches[(c - 1) * 2], matches[(c * 2) - 1]); } - internal ReadOnlySpan LastGroupToStringImpl() - { - return GroupToStringImpl(_matchcount.Length - 1); - } + internal ReadOnlyMemory LastGroupToStringImpl() => + GroupToStringImpl(_matchcount.Length - 1); /// /// Returns a Match instance equivalent to the one supplied that is safe to share @@ -148,27 +149,25 @@ internal ReadOnlySpan LastGroupToStringImpl() /// public static Match Synchronized(Match inner) { - if (inner == null) - throw new ArgumentNullException(nameof(inner)); + if (inner is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.inner); + } int numgroups = inner._matchcount.Length; // Populate all groups by looking at each one for (int i = 0; i < numgroups; i++) { - Group group = inner.Groups[i]; - // Depends on the fact that Group.Synchronized just // operates on and returns the same instance - Group.Synchronized(group); + Synchronized(inner.Groups[i]); } return inner; } - /// - /// Adds a capture to the group specified by "cap" - /// + /// Adds a capture to the group specified by "cap" internal void AddMatch(int cap, int start, int len) { _matches[cap] ??= new int[2]; @@ -182,7 +181,10 @@ internal void AddMatch(int cap, int start, int len) int[] oldmatches = matches[cap]; int[] newmatches = new int[capcount * 8]; for (int j = 0; j < capcount * 2; j++) + { newmatches[j] = oldmatches[j]; + } + matches[cap] = newmatches; } @@ -191,13 +193,12 @@ internal void AddMatch(int cap, int start, int len) matchcount[cap] = capcount + 1; } - /* - * Nonpublic builder: Add a capture to balance the specified group. This is used by the - balanced match construct. (?...) - - If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(cap). - However, since we have backtracking, we need to keep track of everything. - */ + /// + /// Nonpublic builder: Add a capture to balance the specified group. This is used by the + /// balanced match construct. (?<foo-foo2>...) + /// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(cap). + /// However, since we have backtracking, we need to keep track of everything. + /// internal void BalanceMatch(int cap) { _balancing = true; @@ -210,33 +211,35 @@ internal void BalanceMatch(int cap) // capture group for balancing. If it is, we'll reset target to point to that capture. int[][] matches = _matches; if (matches[cap][target] < 0) + { target = -3 - matches[cap][target]; + } // move back to the previous capture target -= 2; // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it. if (target >= 0 && matches[cap][target] < 0) + { AddMatch(cap, matches[cap][target], matches[cap][target + 1]); + } else + { AddMatch(cap, -3 - target, -4 - target /* == -3 - (target + 1) */ ); + } } - /// - /// Removes a group match by capnum - /// - internal void RemoveMatch(int cap) - { - _matchcount[cap]--; - } + /// Removes a group match by capnum + internal void RemoveMatch(int cap) => _matchcount[cap]--; - /// - /// Tells if a group was matched by capnum - /// + /// Tells if a group was matched by capnum internal bool IsMatched(int cap) { int[] matchcount = _matchcount; - return (uint)cap < (uint)matchcount.Length && matchcount[cap] > 0 && _matches[cap][matchcount[cap] * 2 - 1] != (-3 + 1); + return + (uint)cap < (uint)matchcount.Length && + matchcount[cap] > 0 && + _matches[cap][matchcount[cap] * 2 - 1] != (-3 + 1); } /// @@ -247,10 +250,7 @@ internal int MatchIndex(int cap) int[][] matches = _matches; int i = matches[cap][_matchcount[cap] * 2 - 2]; - if (i >= 0) - return i; - - return matches[cap][-3 - i]; + return i >= 0 ? i : matches[cap][-3 - i]; } /// @@ -261,96 +261,99 @@ internal int MatchLength(int cap) int[][] matches = _matches; int i = matches[cap][_matchcount[cap] * 2 - 1]; - if (i >= 0) - return i; - - return matches[cap][-3 - i]; + return i >= 0 ? i : matches[cap][-3 - i]; } - /// - /// Tidy the match so that it can be used as an immutable result - /// + /// Tidy the match so that it can be used as an immutable result internal void Tidy(int textpos) { - int[][] matches = _matches; - - int[] interval = matches[0]; + _textpos = textpos; + _capcount = _matchcount[0]; + int[] interval = _matches[0]; Index = interval[0]; Length = interval[1]; - _textpos = textpos; + if (_balancing) + { + TidyBalancing(); + } + } + private void TidyBalancing() + { int[] matchcount = _matchcount; - _capcount = matchcount[0]; + int[][] matches = _matches; - if (_balancing) + // The idea here is that we want to compact all of our unbalanced captures. To do that we + // use j basically as a count of how many unbalanced captures we have at any given time + // (really j is an index, but j/2 is the count). First we skip past all of the real captures + // until we find a balance captures. Then we check each subsequent entry. If it's a balance + // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy + // it down to the last free position. + for (int cap = 0; cap < matchcount.Length; cap++) { - // The idea here is that we want to compact all of our unbalanced captures. To do that we - // use j basically as a count of how many unbalanced captures we have at any given time - // (really j is an index, but j/2 is the count). First we skip past all of the real captures - // until we find a balance captures. Then we check each subsequent entry. If it's a balance - // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy - // it down to the last free position. - for (int cap = 0; cap < matchcount.Length; cap++) - { - int limit; - int[] matcharray; + int limit; + int[] matcharray; - limit = matchcount[cap] * 2; - matcharray = matches[cap]; + limit = matchcount[cap] * 2; + matcharray = matches[cap]; - int i = 0; - int j; + int i; + int j; - for (i = 0; i < limit; i++) + for (i = 0; i < limit; i++) + { + if (matcharray[i] < 0) { - if (matcharray[i] < 0) - break; + break; } + } - for (j = i; i < limit; i++) + for (j = i; i < limit; i++) + { + if (matcharray[i] < 0) { - if (matcharray[i] < 0) - { - // skip negative values - j--; - } - else + // skip negative values + j--; + } + else + { + // but if we find something positive (an actual capture), copy it back to the last + // unbalanced position. + if (i != j) { - // but if we find something positive (an actual capture), copy it back to the last - // unbalanced position. - if (i != j) - matcharray[j] = matcharray[i]; - j++; + matcharray[j] = matcharray[i]; } - } - matchcount[cap] = j / 2; + j++; + } } - _balancing = false; + matchcount[cap] = j / 2; } + + _balancing = false; } #if DEBUG [ExcludeFromCodeCoverage] - internal bool Debug => _regex != null && _regex.Debug; + internal bool IsDebug => _regex != null && _regex.IsDebug; internal virtual void Dump() { - int i, j; - - for (i = 0; i < _matchcount.Length; i++) + for (int i = 0; i < _matchcount.Length; i++) { - System.Diagnostics.Debug.WriteLine("Capnum " + i.ToString(CultureInfo.InvariantCulture) + ":"); + Debug.WriteLine($"Capnum {i}:"); - for (j = 0; j < _matchcount[i]; j++) + for (int j = 0; j < _matchcount[i]; j++) { string text = ""; if (_matches[i][j * 2] >= 0) + { text = Text.Substring(_matches[i][j * 2], _matches[i][j * 2 + 1]); + } - System.Diagnostics.Debug.WriteLine(" (" + _matches[i][j * 2].ToString(CultureInfo.InvariantCulture) + "," + _matches[i][j * 2 + 1].ToString(CultureInfo.InvariantCulture) + ") " + text); + Debug.WriteLine($" ({_matches[i][j * 2]},{_matches[i][j * 2 + 1]}) {text}"); } } } @@ -360,13 +363,13 @@ internal virtual void Dump() /// /// MatchSparse is for handling the case where slots are sparsely arranged (e.g., if somebody says use slot 100000) /// - internal class MatchSparse : Match + internal sealed class MatchSparse : Match { // the lookup hashtable internal new readonly Hashtable _caps; - internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) - : base(regex, capcount, text, begpos, len, startpos) + internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) : + base(regex, capcount, text, begpos, len, startpos) { _caps = caps; } @@ -382,7 +385,7 @@ internal override void Dump() foreach (object? entry in _caps) { DictionaryEntry kvp = (DictionaryEntry)entry!; - System.Diagnostics.Debug.WriteLine("Slot " + kvp.Key.ToString() + " -> " + kvp.Value!.ToString()); + Debug.WriteLine($"Slot {kvp.Key} -> {kvp.Value}"); } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs index 76017e82b13d1..ca0005da659ab 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/MatchCollection.cs @@ -2,20 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// The MatchCollection lists the successful matches that -// result when searching a string for a regular expression. - using System.Collections; using System.Collections.Generic; using System.Diagnostics; namespace System.Text.RegularExpressions { - /* - * This collection returns a sequence of successful match results, either - * from GetMatchCollection() or GetExecuteCollection(). It stops when the - * first failure is encountered (it does not return the failed match). - */ /// /// Represents the set of names appearing as capturing group /// names in a regular expression. @@ -26,22 +18,20 @@ public class MatchCollection : IList, IReadOnlyList, IList { private readonly Regex _regex; private readonly List _matches; - private bool _done; private readonly string _input; - private readonly int _beginning; - private readonly int _length; private int _startat; private int _prevlen; + private bool _done; - internal MatchCollection(Regex regex, string input, int beginning, int length, int startat) + internal MatchCollection(Regex regex, string input, int startat) { - if (startat < 0 || startat > input.Length) - throw new ArgumentOutOfRangeException(nameof(startat), SR.BeginIndexNotNegative); + if ((uint)startat > (uint)input.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative); + } _regex = regex; _input = input; - _beginning = beginning; - _length = length; _startat = startat; _prevlen = -1; _matches = new List(); @@ -69,21 +59,16 @@ public virtual Match this[int i] { get { - if (i < 0) - throw new ArgumentOutOfRangeException(nameof(i)); - - Match? match = GetMatch(i); - - if (match == null) - throw new ArgumentOutOfRangeException(nameof(i)); - + Match? match = null; + if (i < 0 || (match = GetMatch(i)) is null) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.i); + } return match; } } - /// - /// Provides an enumerator in the same order as Item[i]. - /// + /// Provides an enumerator in the same order as Item[i]. public IEnumerator GetEnumerator() => new Enumerator(this); IEnumerator IEnumerable.GetEnumerator() => new Enumerator(this); @@ -93,17 +78,19 @@ public virtual Match this[int i] Debug.Assert(i >= 0, "i cannot be negative."); if (_matches.Count > i) + { return _matches[i]; + } if (_done) + { return null; + } Match match; - do { - match = _regex.Run(false, _prevlen, _input, _beginning, _length, _startat)!; - + match = _regex.Run(false, _prevlen, _input, 0, _input.Length, _startat)!; if (!match.Success) { _done = true; @@ -111,7 +98,6 @@ public virtual Match this[int i] } _matches.Add(match); - _prevlen = match.Length; _startat = match._textpos; } while (_matches.Count <= i); @@ -149,31 +135,23 @@ int IList.IndexOf(Match item) return _matches.IndexOf(item); } - void IList.Insert(int index, Match item) - { + void IList.Insert(int index, Match item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.RemoveAt(int index) - { + void IList.RemoveAt(int index) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } Match IList.this[int index] { - get { return this[index]; } - set { throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } + get => this[index]; + set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } - void ICollection.Add(Match item) - { + void ICollection.Add(Match item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void ICollection.Clear() - { + void ICollection.Clear() => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool ICollection.Contains(Match item) { @@ -181,48 +159,36 @@ bool ICollection.Contains(Match item) return _matches.Contains(item); } - bool ICollection.Remove(Match item) - { + bool ICollection.Remove(Match item) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - int IList.Add(object? value) - { + int IList.Add(object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.Clear() - { + void IList.Clear() => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool IList.Contains(object? value) => value is Match && ((ICollection)this).Contains((Match)value); int IList.IndexOf(object? value) => - value is Match ? ((IList)this).IndexOf((Match)value) : -1; + value is Match other ? ((IList)this).IndexOf(other) : -1; - void IList.Insert(int index, object? value) - { + void IList.Insert(int index, object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } bool IList.IsFixedSize => true; - void IList.Remove(object? value) - { + void IList.Remove(object? value) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } - void IList.RemoveAt(int index) - { + void IList.RemoveAt(int index) => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); - } object? IList.this[int index] { - get { return this[index]; } - set { throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } + get => this[index]; + set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection); } private sealed class Enumerator : IEnumerator @@ -241,12 +207,14 @@ internal Enumerator(MatchCollection collection) public bool MoveNext() { if (_index == -2) + { return false; + } _index++; Match? match = _collection.GetMatch(_index); - if (match == null) + if (match is null) { _index = -2; return false; @@ -260,7 +228,9 @@ public Match Current get { if (_index < 0) + { throw new InvalidOperationException(SR.EnumNotStarted); + } return _collection.GetMatch(_index)!; } @@ -268,10 +238,7 @@ public Match Current object IEnumerator.Current => Current; - void IEnumerator.Reset() - { - _index = -1; - } + void IEnumerator.Reset() => _index = -1; void IDisposable.Dispose() { } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs index 0049aff187c2f..b780608db3e5d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs @@ -20,7 +20,7 @@ public static int CacheSize { if (value < 0) { - throw new ArgumentOutOfRangeException(nameof(value)); + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.value); } RegexCache.MaxCacheSize = value; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs index bba51ade96e87..8e13953ae276e 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Match.cs @@ -23,35 +23,32 @@ public static bool IsMatch(string input, string pattern, RegexOptions options) = public static bool IsMatch(string input, string pattern, RegexOptions options, TimeSpan matchTimeout) => RegexCache.GetOrAdd(pattern, options, matchTimeout).IsMatch(input); - /* - * Returns true if the regex finds a match within the specified string - */ /// /// Searches the input string for one or more matches using the previous pattern, /// options, and starting position. /// public bool IsMatch(string input) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return IsMatch(input, UseOptionR() ? input.Length : 0); + return Run(quick: true, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0) is null; } - /* - * Returns true if the regex finds a match after the specified position - * (proceeding leftward if the regex is leftward and rightward otherwise) - */ /// /// Searches the input string for one or more matches using the previous pattern and options, /// with a new starting position. /// public bool IsMatch(string input, int startat) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return (null == Run(true, -1, input, 0, input.Length, startat)); + return Run(quick: true, -1, input, 0, input.Length, startat) is null; } /// @@ -72,51 +69,45 @@ public static Match Match(string input, string pattern, RegexOptions options) => public static Match Match(string input, string pattern, RegexOptions options, TimeSpan matchTimeout) => RegexCache.GetOrAdd(pattern, options, matchTimeout).Match(input); - /* - * Finds the first match for the regular expression starting at the beginning - * of the string (or at the end of the string if the regex is leftward) - */ /// /// Matches a regular expression with a string and returns - /// the precise result as a RegexMatch object. + /// the precise result as a Match object. /// public Match Match(string input) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Match(input, UseOptionR() ? input.Length : 0); + return Run(quick: false, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0)!; } - /* - * Finds the first match, starting at the specified position - */ /// /// Matches a regular expression with a string and returns - /// the precise result as a RegexMatch object. + /// the precise result as a Match object. /// public Match Match(string input, int startat) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Run(false, -1, input, 0, input.Length, startat)!; + return Run(quick: false, -1, input, 0, input.Length, startat)!; } - /* - * Finds the first match, restricting the search to the specified interval of - * the char array. - */ /// - /// Matches a regular expression with a string and returns the precise result as a - /// RegexMatch object. + /// Matches a regular expression with a string and returns the precise result as a Match object. /// public Match Match(string input, int beginning, int length) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Run(false, -1, input, beginning, length, UseOptionR() ? beginning + length : beginning)!; + return Run(quick: false, -1, input, beginning, length, UseOptionR() ? beginning + length : beginning)!; } /// @@ -134,33 +125,30 @@ public static MatchCollection Matches(string input, string pattern, RegexOptions public static MatchCollection Matches(string input, string pattern, RegexOptions options, TimeSpan matchTimeout) => RegexCache.GetOrAdd(pattern, options, matchTimeout).Matches(input); - /* - * Finds the first match for the regular expression starting at the beginning - * of the string Enumerator(or at the end of the string if the regex is leftward) - */ /// /// Returns all the successful matches as if Match was called iteratively numerous times. /// public MatchCollection Matches(string input) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Matches(input, UseOptionR() ? input.Length : 0); + return new MatchCollection(this, input, UseOptionR() ? input.Length : 0); } - /* - * Finds the first match, starting at the specified position - */ /// /// Returns all the successful matches as if Match was called iteratively numerous times. /// public MatchCollection Matches(string input, int startat) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return new MatchCollection(this, input, 0, input.Length, startat); + return new MatchCollection(this, input, startat); } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs index 5a8c16c249edf..2ddc07ad86279 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Replace.cs @@ -2,19 +2,15 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System.Collections.Generic; -using System.IO; -using System.Text; - namespace System.Text.RegularExpressions { // Callback class public delegate string MatchEvaluator(Match match); + internal delegate bool MatchCallback(ref TState state, Match match); + public partial class Regex { - private const int ReplaceBufferSize = 256; - /// /// Replaces all occurrences of the pattern with the pattern, starting at /// the first character in the input string. @@ -40,8 +36,10 @@ public static string Replace(string input, string pattern, string replacement, R /// public string Replace(string input, string replacement) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } return Replace(input, replacement, -1, UseOptionR() ? input.Length : 0); } @@ -53,8 +51,10 @@ public string Replace(string input, string replacement) /// public string Replace(string input, string replacement, int count) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } return Replace(input, replacement, count, UseOptionR() ? input.Length : 0); } @@ -66,16 +66,20 @@ public string Replace(string input, string replacement, int count) /// public string Replace(string input, string replacement, int count, int startat) { - if (input == null) - throw new ArgumentNullException(nameof(input)); - - if (replacement == null) - throw new ArgumentNullException(nameof(replacement)); - - // Gets the weakly cached replacement helper or creates one if there isn't one already. - RegexReplacement repl = RegexReplacement.GetOrCreate(_replref!, replacement, caps!, capsize, capnames!, roptions); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } + if (replacement is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.replacement); + } - return repl.Replace(this, input, count, startat); + // Gets the weakly cached replacement helper or creates one if there isn't one already, + // then uses it to perform the replace. + return + RegexReplacement.GetOrCreate(_replref!, replacement, caps!, capsize, capnames!, roptions). + Replace(this, input, count, startat); } /// @@ -101,10 +105,12 @@ public static string Replace(string input, string pattern, MatchEvaluator evalua /// public string Replace(string input, MatchEvaluator evaluator) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Replace(input, evaluator, -1, UseOptionR() ? input.Length : 0); + return Replace(evaluator, this, input, -1, UseOptionR() ? input.Length : 0); } /// @@ -113,10 +119,12 @@ public string Replace(string input, MatchEvaluator evaluator) /// public string Replace(string input, MatchEvaluator evaluator, int count) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Replace(input, evaluator, count, UseOptionR() ? input.Length : 0); + return Replace(evaluator, this, input, count, UseOptionR() ? input.Length : 0); } /// @@ -126,8 +134,10 @@ public string Replace(string input, MatchEvaluator evaluator, int count) /// public string Replace(string input, MatchEvaluator evaluator, int count, int startat) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } return Replace(evaluator, this, input, count, startat); } @@ -143,79 +153,65 @@ public string Replace(string input, MatchEvaluator evaluator, int count, int sta /// private static string Replace(MatchEvaluator evaluator, Regex regex, string input, int count, int startat) { - if (evaluator == null) - throw new ArgumentNullException(nameof(evaluator)); + if (evaluator is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.evaluator); + } if (count < -1) - throw new ArgumentOutOfRangeException(nameof(count), SR.CountTooSmall); - if (startat < 0 || startat > input.Length) - throw new ArgumentOutOfRangeException(nameof(startat), SR.BeginIndexNotNegative); + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.CountTooSmall); + } + if ((uint)startat > (uint)input.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative); + } if (count == 0) + { return input; + } - Match match = regex.Match(input, startat); + var state = (segments: new SegmentStringBuilder(256), evaluator, prevat: 0, input, count); - if (!match.Success) + if (!regex.RightToLeft) { - return input; + regex.Run(input, startat, ref state, (ref (SegmentStringBuilder segments, MatchEvaluator evaluator, int prevat, string input, int count) state, Match match) => + { + state.segments.Add(state.input.AsMemory(state.prevat, match.Index - state.prevat)); + state.prevat = match.Index + match.Length; + state.segments.Add(state.evaluator(match).AsMemory()); + return --state.count != 0; + }); + + if (state.segments.Count == 0) + { + return input; + } + + state.segments.Add(input.AsMemory(state.prevat, input.Length - state.prevat)); } else { - var vsb = new ValueStringBuilder(stackalloc char[ReplaceBufferSize]); + state.prevat = input.Length; - if (!regex.RightToLeft) + regex.Run(input, startat, ref state, (ref (SegmentStringBuilder segments, MatchEvaluator evaluator, int prevat, string input, int count) state, Match match) => { - int prevat = 0; - - do - { - if (match.Index != prevat) - vsb.Append(input.AsSpan(prevat, match.Index - prevat)); - - prevat = match.Index + match.Length; - string result = evaluator(match); - if (!string.IsNullOrEmpty(result)) - vsb.Append(result); + state.segments.Add(state.input.AsMemory(match.Index + match.Length, state.prevat - match.Index - match.Length)); + state.prevat = match.Index; + state.segments.Add(evaluator(match).AsMemory()); + return --state.count != 0; + }); - if (--count == 0) - break; - - match = match.NextMatch(); - } while (match.Success); - - if (prevat < input.Length) - vsb.Append(input.AsSpan(prevat, input.Length - prevat)); - } - else + if (state.segments.Count == 0) { - // In right to left mode append all the inputs in reversed order to avoid an extra dynamic data structure - // and to be able to work with Spans. A final reverse of the transformed reversed input string generates - // the desired output. Similar to Tower of Hanoi. - - int prevat = input.Length; - - do - { - if (match.Index + match.Length != prevat) - vsb.AppendReversed(input.AsSpan(match.Index + match.Length, prevat - match.Index - match.Length)); - - prevat = match.Index; - vsb.AppendReversed(evaluator(match)); - - if (--count == 0) - break; - - match = match.NextMatch(); - } while (match.Success); - - if (prevat > 0) - vsb.AppendReversed(input.AsSpan(0, prevat)); - - vsb.Reverse(); + return input; } - return vsb.ToString(); + state.segments.Add(input.AsMemory(0, state.prevat)); + state.segments.AsSpan().Reverse(); } + + return state.segments.ToString(); } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs index d0bb0e991e66b..ee5a2a43a8cdd 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Split.cs @@ -30,10 +30,12 @@ public static string[] Split(string input, string pattern, RegexOptions options, /// public string[] Split(string input) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } - return Split(input, 0, UseOptionR() ? input.Length : 0); + return Split(this, input, 0, UseOptionR() ? input.Length : 0); } /// @@ -42,8 +44,10 @@ public string[] Split(string input) /// public string[] Split(string input, int count) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } return Split(this, input, count, UseOptionR() ? input.Length : 0); } @@ -53,8 +57,10 @@ public string[] Split(string input, int count) /// public string[] Split(string input, int count, int startat) { - if (input == null) - throw new ArgumentNullException(nameof(input)); + if (input is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.input); + } return Split(this, input, count, startat); } @@ -66,94 +72,79 @@ public string[] Split(string input, int count, int startat) private static string[] Split(Regex regex, string input, int count, int startat) { if (count < 0) - throw new ArgumentOutOfRangeException(nameof(count), SR.CountTooSmall); - if (startat < 0 || startat > input.Length) - throw new ArgumentOutOfRangeException(nameof(startat), SR.BeginIndexNotNegative); - - string[] result; + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.CountTooSmall); + } + if ((uint)startat > (uint)input.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative); + } if (count == 1) { - result = new string[1]; - result[0] = input; - return result; + return new[] { input }; } - count -= 1; + count--; + var state = (results: new List(), prevat: 0, input, count); - Match match = regex.Match(input, startat); - - if (!match.Success) - { - result = new string[1]; - result[0] = input; - return result; - } - else + if (!regex.RightToLeft) { - List al = new List(); - - if (!regex.RightToLeft) + regex.Run(input, startat, ref state, (ref (List results, int prevat, string input, int count) state, Match match) => { - int prevat = 0; + state.results.Add(state.input.Substring(state.prevat, match.Index - state.prevat)); + state.prevat = match.Index + match.Length; - while (true) + // add all matched capture groups to the list. + for (int i = 1; i < match.Groups.Count; i++) { - al.Add(input.Substring(prevat, match.Index - prevat)); - - prevat = match.Index + match.Length; - - // add all matched capture groups to the list. - for (int i = 1; i < match.Groups.Count; i++) + if (match.IsMatched(i)) { - if (match.IsMatched(i)) - al.Add(match.Groups[i].ToString()); + state.results.Add(match.Groups[i].ToString()); } + } - if (--count == 0) - break; + return --state.count != 0; + }); - match = match.NextMatch(); + if (state.results.Count == 0) + { + return new[] { input }; + } - if (!match.Success) - break; - } + state.results.Add(input.Substring(state.prevat, input.Length - state.prevat)); + } + else + { + state.prevat = input.Length; - al.Add(input.Substring(prevat, input.Length - prevat)); - } - else + regex.Run(input, startat, ref state, (ref (List results, int prevat, string input, int count) state, Match match) => { - int prevat = input.Length; + state.results.Add(state.input.Substring(match.Index + match.Length, state.prevat - match.Index - match.Length)); + state.prevat = match.Index; - while (true) + // add all matched capture groups to the list. + for (int i = 1; i < match.Groups.Count; i++) { - al.Add(input.Substring(match.Index + match.Length, prevat - match.Index - match.Length)); - - prevat = match.Index; - - // add all matched capture groups to the list. - for (int i = 1; i < match.Groups.Count; i++) + if (match.IsMatched(i)) { - if (match.IsMatched(i)) - al.Add(match.Groups[i].ToString()); + state.results.Add(match.Groups[i].ToString()); } - - if (--count == 0) - break; - - match = match.NextMatch(); - - if (!match.Success) - break; } - al.Add(input.Substring(0, prevat)); + return --state.count != 0; + }); - al.Reverse(0, al.Count); + if (state.results.Count == 0) + { + return new[] { input }; } - return al.ToArray(); + state.results.Add(input.Substring(0, state.prevat)); + state.results.Reverse(0, state.results.Count); } + + return state.results.ToArray(); } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Timeout.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Timeout.cs index f82231b72b9ba..8d60f3302c216 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Timeout.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Timeout.cs @@ -10,11 +10,14 @@ public partial class Regex { // We need this because time is queried using Environment.TickCount for performance reasons // (Environment.TickCount returns milliseconds as an int and cycles): - private static readonly TimeSpan s_maximumMatchTimeout = TimeSpan.FromMilliseconds(int.MaxValue - 1); + private const ulong MaximumMatchTimeoutTicks = 10_000UL * (int.MaxValue - 1); // TimeSpan.FromMilliseconds(int.MaxValue - 1).Ticks; // During static initialisation of Regex we check private const string DefaultMatchTimeout_ConfigKeyName = "REGEX_DEFAULT_MATCH_TIMEOUT"; + // Number of ticks represented by InfiniteMatchTimeout + private const long InfiniteMatchTimeoutTicks = -10_000; // InfiniteMatchTimeout.Ticks + // InfiniteMatchTimeout specifies that match timeout is switched OFF. It allows for faster code paths // compared to simply having a very large timeout. // We do not want to ask users to use System.Threading.Timeout.InfiniteTimeSpan as a parameter because: @@ -52,7 +55,7 @@ private static TimeSpan InitDefaultMatchTimeout() object? defaultMatchTimeoutObj = ad.GetData(DefaultMatchTimeout_ConfigKeyName); // If no default is specified, use fallback - if (defaultMatchTimeoutObj == null) + if (defaultMatchTimeoutObj is null) { return InfiniteMatchTimeout; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index c25170723ec1c..05116c6861b7f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Reflection; @@ -102,14 +103,18 @@ private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, C internalMatchTimeout = matchTimeout; #if DEBUG - if (Debug) + if (IsDebug) { - System.Diagnostics.Debug.Write($"Pattern: {pattern}"); + Debug.Write($"Pattern: {pattern}"); RegexOptions displayOptions = options & ~RegexOptions.Debug; if (displayOptions != RegexOptions.None) - System.Diagnostics.Debug.Write($"Options: {displayOptions}"); - if (matchTimeout != Regex.InfiniteMatchTimeout) - System.Diagnostics.Debug.Write($"Timeout: {matchTimeout}"); + { + Debug.Write($"Options: {displayOptions}"); + } + if (matchTimeout != InfiniteMatchTimeout) + { + Debug.Write($"Timeout: {matchTimeout}"); + } } #endif @@ -130,28 +135,21 @@ internal static void ValidatePattern(string pattern) { if (pattern is null) { - throw new ArgumentNullException(nameof(pattern)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.pattern); } } internal static void ValidateOptions(RegexOptions options) { - if (options < RegexOptions.None || (((int)options) >> MaxOptionShift) != 0) - { - throw new ArgumentOutOfRangeException(nameof(options)); - } - - if ((options & RegexOptions.ECMAScript) != 0 && - (options & ~(RegexOptions.ECMAScript | - RegexOptions.IgnoreCase | - RegexOptions.Multiline | - RegexOptions.Compiled | + if (((((uint)options) >> MaxOptionShift) != 0) || + ((options & RegexOptions.ECMAScript) != 0 && + (options & ~(RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Compiled | #if DEBUG RegexOptions.Debug | #endif - RegexOptions.CultureInvariant)) != 0) + RegexOptions.CultureInvariant)) != 0)) { - throw new ArgumentOutOfRangeException(nameof(options)); + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.options); } } @@ -160,18 +158,15 @@ internal static void ValidateOptions(RegexOptions options) /// The valid range is TimeSpan.Zero < matchTimeout <= Regex.MaximumMatchTimeout. /// /// The timeout value to validate. - /// If the specified timeout is not within a valid range. - /// + /// If the specified timeout is not within a valid range. protected internal static void ValidateMatchTimeout(TimeSpan matchTimeout) { - if (InfiniteMatchTimeout == matchTimeout) - return; - - // make sure timeout is not longer then Environment.Ticks cycle length: - if (TimeSpan.Zero < matchTimeout && matchTimeout <= s_maximumMatchTimeout) - return; - - throw new ArgumentOutOfRangeException(nameof(matchTimeout)); + // make sure timeout is positive but not longer then Environment.Ticks cycle length + long matchTimeoutTicks = matchTimeout.Ticks; + if (matchTimeoutTicks != InfiniteMatchTimeoutTicks && ((ulong)(matchTimeoutTicks - 1) >= MaximumMatchTimeoutTicks)) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.matchTimeout); + } } protected Regex(SerializationInfo info, StreamingContext context) => @@ -186,8 +181,10 @@ protected IDictionary? Caps get => caps; set { - if (value == null) - throw new ArgumentNullException(nameof(value)); + if (value is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } caps = value as Hashtable ?? new Hashtable(value); } @@ -199,8 +196,10 @@ protected IDictionary? CapNames get => capnames; set { - if (value == null) - throw new ArgumentNullException(nameof(value)); + if (value is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } capnames = value as Hashtable ?? new Hashtable(value); } @@ -227,12 +226,12 @@ public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, Assembly { if (assemblyname is null) { - throw new ArgumentNullException(nameof(assemblyname)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.assemblyname); } if (regexinfos is null) { - throw new ArgumentNullException(nameof(regexinfos)); + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.regexinfos); } #if DEBUG // until it can be fully implemented @@ -253,8 +252,10 @@ public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, Assembly /// public static string Escape(string str) { - if (str == null) - throw new ArgumentNullException(nameof(str)); + if (str is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.str); + } return RegexParser.Escape(str); } @@ -264,8 +265,10 @@ public static string Escape(string str) /// public static string Unescape(string str) { - if (str == null) - throw new ArgumentNullException(nameof(str)); + if (str is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.str); + } return RegexParser.Unescape(str); } @@ -285,12 +288,6 @@ public static string Unescape(string str) /// public override string ToString() => pattern!; - /* - * Returns an array of the group names that are used to capture groups - * in the regular expression. Only needed if the regex is not known until - * runtime, and one wants to extract captured groups. (Probably unusual, - * but supplied for completeness.) - */ /// /// Returns the GroupNameCollection for the regular expression. This collection contains the /// set of strings used to name capturing groups in the expression. @@ -299,12 +296,12 @@ public string[] GetGroupNames() { string[] result; - if (capslist == null) + if (capslist is null) { result = new string[capsize]; for (int i = 0; i < result.Length; i++) { - result[i] = i.ToString(); + result[i] = ((uint)i).ToString(); } } else @@ -315,12 +312,6 @@ public string[] GetGroupNames() return result; } - /* - * Returns an array of the group numbers that are used to capture groups - * in the regular expression. Only needed if the regex is not known until - * runtime, and one wants to extract captured groups. (Probably unusual, - * but supplied for completeness.) - */ /// /// Returns the integer group number corresponding to a group name. /// @@ -328,11 +319,9 @@ public int[] GetGroupNumbers() { int[] result; - if (caps == null) + if (caps is null) { - int max = capsize; - result = new int[max]; - + result = new int[capsize]; for (int i = 0; i < result.Length; i++) { result[i] = i; @@ -340,9 +329,8 @@ public int[] GetGroupNumbers() } else { - result = new int[caps.Count]; - // Manual use of IDictionaryEnumerator instead of foreach to avoid DictionaryEntry box allocations. + result = new int[caps.Count]; IDictionaryEnumerator de = caps.GetEnumerator(); while (de.MoveNext()) { @@ -353,134 +341,123 @@ public int[] GetGroupNumbers() return result; } - /* - * Given a group number, maps it to a group name. Note that numbered - * groups automatically get a group name that is the decimal string - * equivalent of its number. - * - * Returns null if the number is not a recognized group number. - */ /// /// Retrieves a group name that corresponds to a group number. /// public string GroupNameFromNumber(int i) { - if (capslist == null) + if (capslist is null) { - if (i >= 0 && i < capsize) - return i.ToString(); - - return string.Empty; + return (uint)i < (uint)capsize ? + ((uint)i).ToString() : + string.Empty; } else { - if (caps != null) - { - if (!caps.TryGetValue(i, out i)) - return string.Empty; - } - - if (i >= 0 && i < capslist.Length) - return capslist[i]; - - return string.Empty; + return caps != null && !caps.TryGetValue(i, out i) ? string.Empty : + (uint)i < (uint)capslist.Length ? capslist[i] : + string.Empty; } } - /* - * Given a group name, maps it to a group number. Note that numbered - * groups automatically get a group name that is the decimal string - * equivalent of its number. - * - * Returns -1 if the name is not a recognized group name. - */ /// - /// Returns a group number that corresponds to a group name. + /// Returns a group number that corresponds to a group name, or -1 if the name is not a recognized group name. /// public int GroupNumberFromName(string name) { - if (name == null) - throw new ArgumentNullException(nameof(name)); - - int result; + if (name is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.name); + } - // look up name if we have a hashtable of names if (capnames != null) { - return capnames.TryGetValue(name, out result) ? result : -1; + // Look up name if we have a hashtable of names. + return capnames.TryGetValue(name, out int result) ? result : -1; } - - // convert to an int if it looks like a number - result = 0; - for (int i = 0; i < name.Length; i++) + else { - uint digit = (uint)(name[i] - '0'); - if (digit > 9) - { - return -1; - } - - result = (result * 10) + (int)digit; + // Otherwise, try to parse it as a number. + return uint.TryParse(name, NumberStyles.None, provider: null, out uint result) && result < capsize ? (int)result : -1; } - - // return int if it's in range - return result >= 0 && result < capsize ? result : -1; } protected void InitializeReferences() { if (_refsInitialized) - throw new NotSupportedException(SR.OnlyAllowedOnce); + { + ThrowHelper.ThrowNotSupportedException(ExceptionResource.OnlyAllowedOnce); + } - _refsInitialized = true; _replref = new WeakReference(null); + _refsInitialized = true; } - /// - /// Internal worker called by all the public APIs - /// - /// + /// Internal worker called by the public APIs internal Match? Run(bool quick, int prevlen, string input, int beginning, int length, int startat) { - if (startat < 0 || startat > input.Length) - throw new ArgumentOutOfRangeException(nameof(startat), SR.BeginIndexNotNegative); - - if (length < 0 || length > input.Length) - throw new ArgumentOutOfRangeException(nameof(length), SR.LengthNotNegative); + if ((uint)startat > (uint)input.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative); + } + if ((uint)length > (uint)input.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.length, ExceptionResource.LengthNotNegative); + } - RegexRunner runner = - Interlocked.Exchange(ref _runner, null) ?? // use a cached runner if there is one - (factory != null ? factory.CreateInstance() : // use the compiled RegexRunner factory if there is one - new RegexInterpreter(_code!, UseOptionInvariant() ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture)); + RegexRunner runner = RentRunner(); try { // Do the scan starting at the requested position Match? match = runner.Scan(this, input, beginning, beginning + length, startat, prevlen, quick, internalMatchTimeout); #if DEBUG - if (Debug) match?.Dump(); + if (IsDebug) match?.Dump(); #endif return match; } finally { - // Release the runner back to the cache - _runner = runner; + ReturnRunner(runner); } } + internal void Run(string input, int startat, ref TState state, MatchCallback callback) + { + Debug.Assert((uint)startat <= (uint)input.Length); + RegexRunner runner = RentRunner(); + try + { + runner.Scan(this, input, startat, ref state, callback, internalMatchTimeout); + } + finally + { + ReturnRunner(runner); + } + } + + /// Gets a runner from the cache, or creates a new one. + [MethodImpl(MethodImplOptions.AggressiveInlining)] // factored out to be used by only two call sites + private RegexRunner RentRunner() => + Interlocked.Exchange(ref _runner, null) ?? // use a cached runner if there is one + (factory != null ? factory.CreateInstance() : // use the compiled RegexRunner factory if there is one + new RegexInterpreter(_code!, UseOptionInvariant() ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture)); + + /// Release the runner back to the cache. + internal void ReturnRunner(RegexRunner runner) => _runner = runner; + + /// True if the option was set. protected bool UseOptionC() => (roptions & RegexOptions.Compiled) != 0; - /// True if the L option was set + /// True if the option was set. protected internal bool UseOptionR() => (roptions & RegexOptions.RightToLeft) != 0; + /// True if the option was set. internal bool UseOptionInvariant() => (roptions & RegexOptions.CultureInvariant) != 0; #if DEBUG - /// - /// True if the regex has debugging enabled - /// + /// True if the regex has debugging enabled. [ExcludeFromCodeCoverage] - internal bool Debug => (roptions & RegexOptions.Debug) != 0; + internal bool IsDebug => (roptions & RegexOptions.Debug) != 0; #endif } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs index 2980488a29036..9897fae933326 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs @@ -108,8 +108,6 @@ public RegexBoyerMoore(string pattern, bool caseInsensitive, bool rightToLeft, C if (Positive[match] == 0) Positive[match] = match - scan; - // System.Diagnostics.Debug.WriteLine("Set positive[" + match + "] to " + (match - scan)); - break; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 5a7c16f39a260..76288af6a890b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -408,15 +408,12 @@ static RegexCharClass() // Make sure the initial capacity for s_definedCategories is correct Debug.Assert( s_definedCategories.Count == DefinedCategoriesCapacity, - "RegexCharClass s_definedCategories's initial capacity (DefinedCategoriesCapacity) is incorrect.", - "Expected (s_definedCategories.Count): {0}, Actual (DefinedCategoriesCapacity): {1}", - s_definedCategories.Count, - DefinedCategoriesCapacity); + $"Expected (s_definedCategories.Count): {s_definedCategories.Count}, Actual (DefinedCategoriesCapacity): {DefinedCategoriesCapacity}"); // Make sure the s_propTable is correctly ordered int len = s_propTable.Length; for (int i = 0; i < len - 1; i++) - Debug.Assert(string.Compare(s_propTable[i][0], s_propTable[i + 1][0], StringComparison.Ordinal) < 0, "RegexCharClass s_propTable is out of order at (" + s_propTable[i][0] + ", " + s_propTable[i + 1][0] + ")"); + Debug.Assert(string.Compare(s_propTable[i][0], s_propTable[i + 1][0], StringComparison.Ordinal) < 0, $"RegexCharClass s_propTable is out of order at ({s_propTable[i][0]}, {s_propTable[i + 1][0]})"); } #endif diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index a95a9d170e1d0..e3710de9d0211 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -2838,7 +2838,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) Call(s_spanGetLengthMethod); BgeUnFar(skipUpdatesLabel); - // if (textSpan[i] != ch) goto skipUpdatesLabel; + // if (textSpan[textSpanPos] != ch) goto skipUpdatesLabel; Ldloca(textSpanLocal); Ldc(textSpanPos); Call(s_spanGetItemMethod); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index b8ebca81d7d47..df31aef4597b8 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -146,7 +146,7 @@ private void Backtrack() { int newpos = runtrack![runtrackpos++]; #if DEBUG - if (runmatch!.Debug) + if (runmatch!.IsDebug) { if (newpos < 0) Debug.WriteLine(" Backtracking (back2) to code position " + (-newpos)); @@ -621,7 +621,7 @@ protected override void Go() advance = -1; } #if DEBUG - if (runmatch!.Debug) + if (runmatch!.IsDebug) { DumpState(); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs index 88013dd6335b5..a75138419a7f1 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexReplacement.cs @@ -2,15 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -// The RegexReplacement class represents a substitution string for -// use when using regexes to search/replace, etc. It's logically -// a sequence intermixed (1) constant strings and (2) group numbers. - using System.Collections; using System.Collections.Generic; namespace System.Text.RegularExpressions { + /// + /// The RegexReplacement class represents a substitution string for + /// use when using regexes to search/replace, etc. It's logically + /// a sequence intermixed (1) constant strings and (2) group numbers. + /// internal sealed class RegexReplacement { // Constants for special insertion patterns @@ -21,7 +22,7 @@ internal sealed class RegexReplacement public const int WholeString = -4; private readonly List _strings; // table of string constants - private readonly List _rules; // negative -> group #, positive -> string # + private readonly int[] _rules; // negative -> group #, positive -> string # /// /// Since RegexReplacement shares the same parser as Regex, @@ -31,14 +32,17 @@ internal sealed class RegexReplacement public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) { if (concat.Type != RegexNode.Concatenate) - throw new ArgumentException(SR.ReplacementError); + { + throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError); + } Span vsbStack = stackalloc char[256]; var vsb = new ValueStringBuilder(vsbStack); var strings = new List(); - var rules = new List(); + var rules = new ValueListBuilder(stackalloc int[64]); - for (int i = 0; i < concat.ChildCount(); i++) + int childCount = concat.ChildCount(); + for (int i = 0; i < childCount; i++) { RegexNode child = concat.Child(i); @@ -55,32 +59,36 @@ public RegexReplacement(string rep, RegexNode concat, Hashtable _caps) case RegexNode.Ref: if (vsb.Length > 0) { - rules.Add(strings.Count); + rules.Append(strings.Count); strings.Add(vsb.ToString()); vsb = new ValueStringBuilder(vsbStack); } int slot = child.M; if (_caps != null && slot >= 0) + { slot = (int)_caps[slot]!; + } - rules.Add(-Specials - 1 - slot); + rules.Append(-Specials - 1 - slot); break; default: - throw new ArgumentException(SR.ReplacementError); + throw ThrowHelper.CreateArgumentException(ExceptionResource.ReplacementError); } } if (vsb.Length > 0) { - rules.Add(strings.Count); + rules.Append(strings.Count); strings.Add(vsb.ToString()); } Pattern = rep; _strings = strings; - _rules = rules; + _rules = rules.AsSpan().ToArray(); + + rules.Dispose(); } /// @@ -101,39 +109,43 @@ public static RegexReplacement GetOrCreate(WeakReference replR return repl; } - /// - /// The original pattern string - /// + /// The original pattern string public string Pattern { get; } /// /// Given a Match, emits into the StringBuilder the evaluated /// substitution pattern. /// - public void ReplacementImpl(ref ValueStringBuilder vsb, Match match) + public void ReplacementImpl(ref SegmentStringBuilder segments, Match match) { - for (int i = 0; i < _rules.Count; i++) + foreach (int r in _rules) { - int r = _rules[i]; - if (r >= 0) // string lookup - vsb.Append(_strings[r]); - else if (r < -Specials) // group lookup - vsb.Append(match.GroupToStringImpl(-Specials - 1 - r)); + if (r >= 0) + { + // string lookup + segments.Add(_strings[r].AsMemory()); + } + else if (r < -Specials) + { + // group lookup + segments.Add(match.GroupToStringImpl(-Specials - 1 - r)); + } else { + // special insertion patterns switch (-Specials - 1 - r) - { // special insertion patterns + { case LeftPortion: - vsb.Append(match.GetLeftSubstring()); + segments.Add(match.GetLeftSubstring()); break; case RightPortion: - vsb.Append(match.GetRightSubstring()); + segments.Add(match.GetRightSubstring()); break; case LastGroup: - vsb.Append(match.LastGroupToStringImpl()); + segments.Add(match.LastGroupToStringImpl()); break; case WholeString: - vsb.Append(match.Text); + segments.Add(match.Text.AsMemory()); break; } } @@ -141,42 +153,46 @@ public void ReplacementImpl(ref ValueStringBuilder vsb, Match match) } /// - /// Given a Match, emits into the ValueStringBuilder the evaluated + /// Given a Match, emits into the builder the evaluated /// Right-to-Left substitution pattern. /// - public void ReplacementImplRTL(ref ValueStringBuilder vsb, Match match) + public void ReplacementImplRTL(ref SegmentStringBuilder segments, Match match) { - for (int i = _rules.Count - 1; i >= 0; i--) + for (int i = _rules.Length - 1; i >= 0; i--) { int r = _rules[i]; - if (r >= 0) // string lookup - vsb.AppendReversed(_strings[r]); - else if (r < -Specials) // group lookup - vsb.AppendReversed(match.GroupToStringImpl(-Specials - 1 - r)); + if (r >= 0) + { + // string lookup + segments.Add(_strings[r].AsMemory()); + } + else if (r < -Specials) + { + // group lookup + segments.Add(match.GroupToStringImpl(-Specials - 1 - r)); + } else { + // special insertion patterns switch (-Specials - 1 - r) - { // special insertion patterns + { case LeftPortion: - vsb.AppendReversed(match.GetLeftSubstring()); + segments.Add(match.GetLeftSubstring()); break; case RightPortion: - vsb.AppendReversed(match.GetRightSubstring()); + segments.Add(match.GetRightSubstring()); break; case LastGroup: - vsb.AppendReversed(match.LastGroupToStringImpl()); + segments.Add(match.LastGroupToStringImpl()); break; case WholeString: - vsb.AppendReversed(match.Text); + segments.Add(match.Text.AsMemory()); break; } } } } - // Three very similar algorithms appear below: replace (pattern), - // replace (evaluator), and split. - /// /// Replaces all occurrences of the regex in the string with the /// replacement pattern. @@ -189,71 +205,60 @@ public void ReplacementImplRTL(ref ValueStringBuilder vsb, Match match) public string Replace(Regex regex, string input, int count, int startat) { if (count < -1) - throw new ArgumentOutOfRangeException(nameof(count), SR.CountTooSmall); - if (startat < 0 || startat > input.Length) - throw new ArgumentOutOfRangeException(nameof(startat), SR.BeginIndexNotNegative); + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.CountTooSmall); + } + if ((uint)startat > (uint)input.Length) + { + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative); + } if (count == 0) - return input; - - Match match = regex.Match(input, startat); - if (!match.Success) { return input; } - else - { - var vsb = new ValueStringBuilder(stackalloc char[256]); - - if (!regex.RightToLeft) - { - int prevat = 0; - - do - { - if (match.Index != prevat) - vsb.Append(input.AsSpan(prevat, match.Index - prevat)); - - prevat = match.Index + match.Length; - ReplacementImpl(ref vsb, match); - if (--count == 0) - break; - match = match.NextMatch(); - } while (match.Success); + var state = (replacement: this, segments: new SegmentStringBuilder(256), inputMemory: input.AsMemory(), prevat: 0, count); - if (prevat < input.Length) - vsb.Append(input.AsSpan(prevat, input.Length - prevat)); - } - else + if (!regex.RightToLeft) + { + regex.Run(input, startat, ref state, (ref (RegexReplacement thisRef, SegmentStringBuilder segments, ReadOnlyMemory inputMemory, int prevat, int count) state, Match match) => { - // In right to left mode append all the inputs in reversed order to avoid an extra dynamic data structure - // and to be able to work with Spans. A final reverse of the transformed reversed input string generates - // the desired output. Similar to Tower of Hanoi. + state.segments.Add(state.inputMemory.Slice(state.prevat, match.Index - state.prevat)); + state.prevat = match.Index + match.Length; + state.thisRef.ReplacementImpl(ref state.segments, match); + return --state.count != 0; + }); - int prevat = input.Length; - - do - { - if (match.Index + match.Length != prevat) - vsb.AppendReversed(input.AsSpan(match.Index + match.Length, prevat - match.Index - match.Length)); - - prevat = match.Index; - ReplacementImplRTL(ref vsb, match); - if (--count == 0) - break; + if (state.segments.Count == 0) + { + return input; + } - match = match.NextMatch(); - } while (match.Success); + state.segments.Add(state.inputMemory.Slice(state.prevat, input.Length - state.prevat)); + } + else + { + state.prevat = input.Length; - if (prevat > 0) - vsb.AppendReversed(input.AsSpan(0, prevat)); + regex.Run(input, startat, ref state, (ref (RegexReplacement thisRef, SegmentStringBuilder segments, ReadOnlyMemory inputMemory, int prevat, int count) state, Match match) => + { + state.segments.Add(state.inputMemory.Slice(match.Index + match.Length, state.prevat - match.Index - match.Length)); + state.prevat = match.Index; + state.thisRef.ReplacementImplRTL(ref state.segments, match); + return --state.count != 0; + }); - vsb.Reverse(); + if (state.segments.Count == 0) + { + return input; } - return vsb.ToString(); + state.segments.Add(state.inputMemory.Slice(0, state.prevat)); + state.segments.AsSpan().Reverse(); } + + return state.segments.ToString(); } } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs index 6f2bb75f97b69..acaf1e2125844 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs @@ -14,6 +14,7 @@ // methods to push new subpattern match results into (or remove // backtracked results from) the Match instance. +using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; @@ -63,7 +64,6 @@ public abstract class RegexRunner private bool _ignoreTimeout; private int _timeoutOccursAt; - // We have determined this value in a series of experiments where x86 retail // builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values // of TimeoutCheckFrequency did not tend to increase performance; smaller values @@ -85,118 +85,251 @@ protected internal RegexRunner() { } /// and we could use a separate method Skip() that will quickly scan past /// any characters that we know can't match. /// - protected internal Match? Scan(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) - { - return Scan(regex, text, textbeg, textend, textstart, prevlen, quick, regex.MatchTimeout); - } + protected internal Match? Scan(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) => + Scan(regex, text, textbeg, textend, textstart, prevlen, quick, regex.MatchTimeout); protected internal Match? Scan(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, TimeSpan timeout) { - int bump; - int stoppos; - bool initted = false; - - // We need to re-validate timeout here because Scan is historically protected and - // thus there is a possibility it is called from user code: - Regex.ValidateMatchTimeout(timeout); - - _ignoreTimeout = (Regex.InfiniteMatchTimeout == timeout); - _timeout = _ignoreTimeout - ? (int)Regex.InfiniteMatchTimeout.TotalMilliseconds - : (int)(timeout.TotalMilliseconds + 0.5); // Round - + // Store arguments into fields for derived runner to examine runregex = regex; runtext = text; runtextbeg = textbeg; runtextend = textend; - runtextstart = textstart; + runtextpos = runtextstart = textstart; - bump = runregex.RightToLeft ? -1 : 1; - stoppos = runregex.RightToLeft ? runtextbeg : runtextend; - - runtextpos = textstart; + // Handle timeout argument + _timeout = -1; // (int)Regex.InfiniteMatchTimeout.TotalMilliseconds + bool ignoreTimeout = _ignoreTimeout = Regex.InfiniteMatchTimeout == timeout; + if (!ignoreTimeout) + { + // We are using Environment.TickCount and not Stopwatch for performance reasons. + // Environment.TickCount is an int that cycles. We intentionally let timeoutOccursAt + // overflow it will still stay ahead of Environment.TickCount for comparisons made + // in DoCheckTimeout(). + Regex.ValidateMatchTimeout(timeout); // validate timeout as this could be called from user code due to being protected + _timeout = (int)(timeout.TotalMilliseconds + 0.5); // Round; + _timeoutOccursAt = Environment.TickCount + _timeout; + _timeoutChecksToSkip = TimeoutCheckFrequency; + } - // If previous match was empty or failed, advance by one before matching + // Configure the additional value to "bump" the position along each time we loop around + // to call FindFirstChar again, as well as the stopping position for the loop. We generally + // bump by 1 and stop at runtextend, but if we're examining right-to-left, we instead bump + // by -1 and stop at runtextbeg. + int bump = 1, stoppos = runtextend; + if (runregex.RightToLeft) + { + bump = -1; + stoppos = runtextbeg; + } + // If previous match was empty or failed, advance by one before matching. if (prevlen == 0) { if (runtextpos == stoppos) + { return Match.Empty; + } runtextpos += bump; } - StartTimeoutWatch(); - + // Main loop: FindFirstChar/Go + bump until the ending position. + bool initialized = false; while (true) { #if DEBUG - if (runregex.Debug) + if (runregex.IsDebug) { Debug.WriteLine(""); - Debug.WriteLine("Search range: from " + runtextbeg.ToString(CultureInfo.InvariantCulture) + " to " + runtextend.ToString(CultureInfo.InvariantCulture)); - Debug.WriteLine("Firstchar search starting at " + runtextpos.ToString(CultureInfo.InvariantCulture) + " stopping at " + stoppos.ToString(CultureInfo.InvariantCulture)); + Debug.WriteLine($"Search range: from {runtextbeg} to {runtextend}"); + Debug.WriteLine($"Firstchar search starting at {runtextpos} stopping at {stoppos}"); } #endif + + // Find the next potential location for a match in the input. if (FindFirstChar()) { - CheckTimeout(); + if (!ignoreTimeout) + { + DoCheckTimeout(); + } - if (!initted) + // Ensure that the runner is initialized. This includes initializing all of the state in the runner + // that Go might use, such as the backtracking stack, as well as a Match object for it to populate. + if (!initialized) { - InitMatch(); - initted = true; + InitializeForGo(); + initialized = true; } + #if DEBUG - if (runregex.Debug) + if (runregex.IsDebug) { - Debug.WriteLine("Executing engine starting at " + runtextpos.ToString(CultureInfo.InvariantCulture)); + Debug.WriteLine($"Executing engine starting at {runtextpos}"); Debug.WriteLine(""); } #endif + + // See if there's a match at this position. Go(); - if (runmatch!._matchcount[0] > 0) + // If we got a match, we're done. + Match match = runmatch!; + if (match._matchcount[0] > 0) { - // We'll return a match even if it touches a previous empty match - return TidyMatch(quick); + if (quick) + { + return null; + } + + // Return the match in its canonical form. + runmatch = null; + match.Tidy(runtextpos); + return match; } - // reset state for another go + // Reset state for another iteration. runtrackpos = runtrack!.Length; runstackpos = runstack!.Length; runcrawlpos = runcrawl!.Length; } - // failure! - + // We failed to match at this position. If we're at the stopping point, we're done. if (runtextpos == stoppos) { - TidyMatch(true); return Match.Empty; } - // Recognize leading []* and various anchors, and bump on failure accordingly - - // Bump by one and start again - + // Bump by one (in whichever direction is appropriate) and loop to go again. runtextpos += bump; } - // We never get here } - private void StartTimeoutWatch() + /// Enumerates all of the matches with the specified regex, invoking the callback for each. + /// + /// This repeatedly hands out the same Match instance, updated with new information. + /// + internal void Scan(Regex regex, string text, int textstart, ref TState state, MatchCallback callback, TimeSpan timeout) { - if (_ignoreTimeout) - return; + // Store arguments into fields for derived runner to examine + runregex = regex; + runtext = text; + runtextbeg = 0; + runtextend = text.Length; + runtextpos = runtextstart = textstart; + + // Handle timeout argument + _timeout = -1; // (int)Regex.InfiniteMatchTimeout.TotalMilliseconds + bool ignoreTimeout = _ignoreTimeout = Regex.InfiniteMatchTimeout == timeout; + if (!ignoreTimeout) + { + // We are using Environment.TickCount and not Stopwatch for performance reasons. + // Environment.TickCount is an int that cycles. We intentionally let timeoutOccursAt + // overflow it will still stay ahead of Environment.TickCount for comparisons made + // in DoCheckTimeout(). + _timeout = (int)(timeout.TotalMilliseconds + 0.5); // Round; + _timeoutOccursAt = Environment.TickCount + _timeout; + _timeoutChecksToSkip = TimeoutCheckFrequency; + } - _timeoutChecksToSkip = TimeoutCheckFrequency; + // Configure the additional value to "bump" the position along each time we loop around + // to call FindFirstChar again, as well as the stopping position for the loop. We generally + // bump by 1 and stop at runtextend, but if we're examining right-to-left, we instead bump + // by -1 and stop at runtextbeg. + int bump = 1, stoppos = runtextend; + if (runregex.RightToLeft) + { + bump = -1; + stoppos = runtextbeg; + } + + // Main loop: FindFirstChar/Go + bump until the ending position. + bool initialized = false; + while (true) + { +#if DEBUG + if (runregex.IsDebug) + { + Debug.WriteLine(""); + Debug.WriteLine($"Search range: from {runtextbeg} to {runtextend}"); + Debug.WriteLine($"Firstchar search starting at {runtextpos} stopping at {stoppos}"); + } +#endif - // We are using Environment.TickCount and not Timewatch for performance reasons. - // Environment.TickCount is an int that cycles. We intentionally let timeoutOccursAt - // overflow it will still stay ahead of Environment.TickCount for comparisons made - // in DoCheckTimeout(): - _timeoutOccursAt = Environment.TickCount + _timeout; + // Find the next potential location for a match in the input. + if (FindFirstChar()) + { + if (!ignoreTimeout) + { + DoCheckTimeout(); + } + + // Ensure that the runner is initialized. This includes initializing all of the state in the runner + // that Go might use, such as the backtracking stack, as well as a Match object for it to populate. + if (!initialized) + { + InitializeForGo(); + initialized = true; + } + +#if DEBUG + if (runregex.IsDebug) + { + Debug.WriteLine($"Executing engine starting at {runtextpos}"); + Debug.WriteLine(""); + } +#endif + + // See if there's a match at this position. + Go(); + + // See if we have a match. + Match match = runmatch!; + if (match._matchcount[0] > 0) + { + // Hand it out to the callback in canonical form. + match.Tidy(runtextpos); + initialized = false; + if (!callback(ref state, match)) + { + // If the callback returns false, we're done. + return; + } + + // Reset state for another iteration. + runtrackpos = runtrack!.Length; + runstackpos = runstack!.Length; + runcrawlpos = runcrawl!.Length; + if (match.Length == 0) + { + if (runtextpos == stoppos) + { + return; + } + + runtextpos += bump; + } + + // Loop around to perform next match from where we left off. + continue; + } + + // Ran Go but it didn't find a match. Reset state for another iteration. + runtrackpos = runtrack!.Length; + runstackpos = runstack!.Length; + runcrawlpos = runcrawl!.Length; + } + + // We failed to match at this position. If we're at the stopping point, we're done. + if (runtextpos == stoppos) + { + return; + } + + // Bump by one (in whichever direction is appropriate) and loop to go again. + runtextpos += bump; + } } protected void CheckTimeout() @@ -226,14 +359,14 @@ private void DoCheckTimeout() return; #if DEBUG - if (runregex!.Debug) + if (runregex!.IsDebug) { Debug.WriteLine(""); Debug.WriteLine("RegEx match timeout occurred!"); - Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString()); - Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency); - Debug.WriteLine("Search pattern: " + runregex.pattern); - Debug.WriteLine("Input: " + runtext); + Debug.WriteLine($"Specified timeout: {TimeSpan.FromMilliseconds(_timeout)}"); + Debug.WriteLine($"Timeout check frequency: {TimeoutCheckFrequency}"); + Debug.WriteLine($"Search pattern: {runregex.pattern}"); + Debug.WriteLine($"Input: {runtext}"); Debug.WriteLine("About to throw RegexMatchTimeoutException."); } #endif @@ -266,27 +399,24 @@ private void DoCheckTimeout() /// /// Initializes all the data members that are used by Go() /// - private void InitMatch() + private void InitializeForGo() { - // Use a hashtabled Match object if the capture numbers are sparse - - if (runmatch == null) + if (runmatch is null) { - if (runregex!.caps != null) - runmatch = new MatchSparse(runregex, runregex.caps, runregex.capsize, runtext!, runtextbeg, runtextend - runtextbeg, runtextstart); - else - runmatch = new Match(runregex, runregex.capsize, runtext!, runtextbeg, runtextend - runtextbeg, runtextstart); + // Use a hashtabled Match object if the capture numbers are sparse + runmatch = runregex!.caps is null ? + new Match(runregex, runregex.capsize, runtext!, runtextbeg, runtextend - runtextbeg, runtextstart) : + new MatchSparse(runregex, runregex.caps, runregex.capsize, runtext!, runtextbeg, runtextend - runtextbeg, runtextstart); } else { runmatch.Reset(runregex!, runtext!, runtextbeg, runtextend, runtextstart); } - // note we test runcrawl, because it is the last one to be allocated + // Note we test runcrawl, because it is the last one to be allocated // If there is an alloc failure in the middle of the three allocations, // we may still return to reuse this instance, and we want to behave - // as if the allocations didn't occur. (we used to test _trackcount != 0) - + // as if the allocations didn't occur. if (runcrawl != null) { runtrackpos = runtrack!.Length; @@ -295,15 +425,22 @@ private void InitMatch() return; } + // Everything above runs once per match. + // Everything below runs once per runner. + InitTrackCount(); - int tracksize = runtrackcount * 8; - int stacksize = runtrackcount * 8; + int stacksize; + int tracksize = stacksize = runtrackcount * 8; if (tracksize < 32) + { tracksize = 32; + } if (stacksize < 16) + { stacksize = 16; + } runtrack = new int[tracksize]; runtrackpos = tracksize; @@ -315,29 +452,6 @@ private void InitMatch() runcrawlpos = 32; } - /// - /// Put match in its canonical form before returning it. - /// - private Match? TidyMatch(bool quick) - { - if (!quick) - { - Match match = runmatch!; - - runmatch = null; - - match.Tidy(runtextpos); - return match; - } - else - { - // in quick mode, a successful match returns null, and - // the allocated match object is left in the cache - - return null; - } - } - /// /// Called by the implementation of Go() to increase the size of storage /// @@ -551,9 +665,9 @@ protected int MatchLength(int cap) [ExcludeFromCodeCoverage] internal virtual void DumpState() { - Debug.WriteLine("Text: " + TextposDescription()); - Debug.WriteLine("Track: " + StackDescription(runtrack!, runtrackpos)); - Debug.WriteLine("Stack: " + StackDescription(runstack!, runstackpos)); + Debug.WriteLine($"Text: {TextposDescription()}"); + Debug.WriteLine($"Track: {StackDescription(runtrack!, runtrackpos)}"); + Debug.WriteLine($"Stack: {StackDescription(runstack!, runstackpos)}"); } [ExcludeFromCodeCoverage] diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ThrowHelper.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ThrowHelper.cs new file mode 100644 index 0000000000000..7c22974cbc9f0 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/ThrowHelper.cs @@ -0,0 +1,97 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics.CodeAnalysis; + +namespace System.Text.RegularExpressions +{ + internal static class ThrowHelper + { + [DoesNotReturn] + internal static Exception CreateArgumentException(ExceptionResource resource) => + throw new ArgumentException(GetStringForExceptionResource(resource)); + + [DoesNotReturn] + internal static void ThrowArgumentNullException(ExceptionArgument arg) => + throw new ArgumentNullException(GetStringForExceptionArgument(arg)); + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument arg) => + throw new ArgumentOutOfRangeException(GetStringForExceptionArgument(arg)); + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument arg, ExceptionResource resource) => + throw new ArgumentOutOfRangeException(GetStringForExceptionArgument(arg), GetStringForExceptionResource(resource)); + + [DoesNotReturn] + internal static void ThrowNotSupportedException(ExceptionResource resource) => + throw new NotSupportedException(GetStringForExceptionResource(resource)); + + private static string? GetStringForExceptionArgument(ExceptionArgument arg) => + arg switch + { + ExceptionArgument.assemblyname => nameof(ExceptionArgument.assemblyname), + ExceptionArgument.array => nameof(ExceptionArgument.array), + ExceptionArgument.arrayIndex => nameof(ExceptionArgument.arrayIndex), + ExceptionArgument.count => nameof(ExceptionArgument.count), + ExceptionArgument.evaluator => nameof(ExceptionArgument.evaluator), + ExceptionArgument.i => nameof(ExceptionArgument.i), + ExceptionArgument.inner => nameof(ExceptionArgument.inner), + ExceptionArgument.input => nameof(ExceptionArgument.input), + ExceptionArgument.length => nameof(ExceptionArgument.length), + ExceptionArgument.matchTimeout => nameof(ExceptionArgument.matchTimeout), + ExceptionArgument.name => nameof(ExceptionArgument.name), + ExceptionArgument.options => nameof(ExceptionArgument.options), + ExceptionArgument.pattern => nameof(ExceptionArgument.pattern), + ExceptionArgument.regexinfos => nameof(ExceptionArgument.regexinfos), + ExceptionArgument.replacement => nameof(ExceptionArgument.replacement), + ExceptionArgument.startat => nameof(ExceptionArgument.startat), + ExceptionArgument.str => nameof(ExceptionArgument.str), + ExceptionArgument.value => nameof(ExceptionArgument.value), + _ => null + }; + + private static string? GetStringForExceptionResource(ExceptionResource resource) => + resource switch + { + ExceptionResource.BeginIndexNotNegative => SR.BeginIndexNotNegative, + ExceptionResource.CountTooSmall => SR.CountTooSmall, + ExceptionResource.LengthNotNegative => SR.LengthNotNegative, + ExceptionResource.OnlyAllowedOnce => SR.OnlyAllowedOnce, + ExceptionResource.ReplacementError => SR.ReplacementError, + _ => null + }; + } + + internal enum ExceptionArgument + { + assemblyname, + array, + arrayIndex, + count, + evaluator, + i, + inner, + input, + length, + matchTimeout, + name, + options, + pattern, + regexinfos, + replacement, + startat, + str, + value, + } + + internal enum ExceptionResource + { + BeginIndexNotNegative, + CountTooSmall, + LengthNotNegative, + OnlyAllowedOnce, + ReplacementError, + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/SegmentStringBuilder.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/SegmentStringBuilder.cs new file mode 100644 index 0000000000000..71f5839468665 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/SegmentStringBuilder.cs @@ -0,0 +1,94 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace System.Text +{ + /// Provides a value type string builder composed of individual segments represented as instances. + [DebuggerDisplay("Count = {_count}")] + internal struct SegmentStringBuilder + { + /// The array backing the builder, obtained from . + private ReadOnlyMemory[] _array; + /// The number of items in , and thus also the next position in the array to be filled. + private int _count; + + /// Initializes the builder. + /// The initial capacity of the builder. + public SegmentStringBuilder(int capacity) + { + Debug.Assert(capacity > 0); + _array = ArrayPool>.Shared.Rent(capacity); + _count = 0; + } + + /// Gets the number of segments added to the builder. + public int Count => _count; + + /// Adds a segment to the builder. + /// The segment. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Add(ReadOnlyMemory segment) + { + ReadOnlyMemory[] array = _array; + int pos = _count; + if ((uint)pos < (uint)array.Length) + { + array[pos] = segment; + _count = pos + 1; + } + else + { + GrowAndAdd(segment); + } + } + + /// Grows the builder to accomodate another segment. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + private void GrowAndAdd(ReadOnlyMemory segment) + { + ReadOnlyMemory[] array = _array; + Debug.Assert(array.Length == _count); + + ReadOnlyMemory[] newArray = _array = ArrayPool>.Shared.Rent(array.Length * 2); + Array.Copy(array, newArray, _count); + ArrayPool>.Shared.Return(array, clearArray: true); + newArray[_count++] = segment; + } + + /// Gets a span of all segments in the builder. + /// + public Span> AsSpan() => new Span>(_array, 0, _count); + + /// Creates a string from all the segments in the builder and then disposes of the builder. + public override string ToString() + { + int length = 0; + foreach (ReadOnlyMemory segment in AsSpan()) + { + length += segment.Length; + } + + string result = string.Create(length, this, (dest, builder) => + { + foreach (ReadOnlyMemory segment in builder.AsSpan()) + { + segment.Span.CopyTo(dest); + dest = dest.Slice(segment.Length); + } + }); + + ReadOnlyMemory[] array = _array; + AsSpan().Clear(); // clear just what's been filled + this = default; + ArrayPool>.Shared.Return(array); + + return result; + } + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/ValueStringBuilder.Reverse.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/ValueStringBuilder.Reverse.cs deleted file mode 100644 index 76e2144d6fa31..0000000000000 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/ValueStringBuilder.Reverse.cs +++ /dev/null @@ -1,23 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace System.Text -{ - internal ref partial struct ValueStringBuilder - { - public void AppendReversed(ReadOnlySpan value) - { - Span span = AppendSpan(value.Length); - for (int i = 0; i < span.Length; i++) - { - span[i] = value[value.Length - i - 1]; - } - } - - public void Reverse() - { - _chars.Slice(0, _pos).Reverse(); - } - } -} diff --git a/src/libraries/System.Text.RegularExpressions/tests/PrecompiledRegexScenarioTest.cs b/src/libraries/System.Text.RegularExpressions/tests/PrecompiledRegexScenarioTest.cs index 8b65fee14690d..09a569f0573d1 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/PrecompiledRegexScenarioTest.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/PrecompiledRegexScenarioTest.cs @@ -7,7 +7,9 @@ using System.Text.RegularExpressions; using RegexTestNamespace; using Xunit; -using System.Collections.Generic; + +// NOTE: Be very thoughtful when editing this test file. It's decompiled from an assembly generated +// by CompileToAssembly on .NET Framework, and is used to help validate compatibility with such assemblies. namespace System.Text.RegularExpressionsTests { diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs index cef5a825e2bad..a97ec741df4d1 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs @@ -26,9 +26,9 @@ public static IEnumerable Replace_String_TestData() // Stress string pattern = string.Concat(Enumerable.Repeat("([a-z]", 1000).Concat(Enumerable.Repeat(")", 1000))); string input = string.Concat(Enumerable.Repeat("abcde", 200)); - yield return new object[] { pattern, input, "$1000", RegexOptions.None, input.Length, 0, "e" }; yield return new object[] { pattern, input, "$1", RegexOptions.None, input.Length, 0, input }; + yield return new object[] { ".", new string('a', 1000), "b", RegexOptions.None, 1000, 0, new string('b', 1000) }; // Undefined group yield return new object[] { "([a_z])(.+)", "abc", "$3", RegexOptions.None, 3, 0, "$3" };