Skip to content

Commit

Permalink
Optimize URI decode and encode (#1647)
Browse files Browse the repository at this point in the history
* dispatch via custom error signaling to remove costly throws
* minimize allocations
  • Loading branch information
lahma authored Oct 14, 2023
1 parent e5d60cb commit 8ebdc34
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 103 deletions.
2 changes: 1 addition & 1 deletion Jint.Tests.CommonScripts/SunSpiderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ private static void RunTest(string source)
{
var engine = new Engine()
.SetValue("log", new Action<object>(Console.WriteLine))
.SetValue("assert", new Action<bool, string>((condition, message) => Assert.True(condition, message)));
.SetValue("assert", new Action<bool, string>((condition, message) => Assert.That(condition, message)));

try
{
Expand Down
6 changes: 6 additions & 0 deletions Jint/Engine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public sealed partial class Engine : IDisposable
private readonly ExecutionContextStack _executionContexts;
private JsValue _completionValue = JsValue.Undefined;
internal EvaluationContext? _activeEvaluationContext;
internal ErrorDispatchInfo? _error;

private readonly EventLoop _eventLoop = new();

Expand Down Expand Up @@ -1554,6 +1555,11 @@ private ObjectInstance Construct(
return result;
}

internal void SignalError(ErrorDispatchInfo error)
{
_error = error;
}

public void Dispose()
{
if (_objectWrapperCache is null)
Expand Down
227 changes: 137 additions & 90 deletions Jint/Native/Global/GlobalObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -272,21 +272,10 @@ public static JsValue IsFinite(JsValue thisObject, JsValue[] arguments)
return true;
}

private static readonly HashSet<char> UriReserved = new HashSet<char>
{
';', '/', '?', ':', '@', '&', '=', '+', '$', ','
};

private static readonly HashSet<char> UriUnescaped = new HashSet<char>
{
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', '.', '!',
'~', '*', '\'', '(', ')'
};

private static readonly HashSet<char> UnescapedUriSet = new HashSet<char>(UriReserved.Concat(UriUnescaped).Concat(new[] { '#' }));
private static readonly HashSet<char> ReservedUriSet = new HashSet<char>(UriReserved.Concat(new[] { '#' }));
private static readonly string UriReserved = new (new [] { ';', '/', '?', ':', '@', '&', '=', '+', '$', ',' });
private static readonly string UriUnescaped = new(new [] { '-', '_', '.', '!', '~', '*', '\'', '(', ')' });
private static readonly string UnescapedUriSet = UriReserved + UriUnescaped + '#';
private static readonly string ReservedUriSet = UriReserved + '#';

private const string HexaMap = "0123456789ABCDEF";

Expand Down Expand Up @@ -320,25 +309,26 @@ public JsValue EncodeUriComponent(JsValue thisObject, JsValue[] arguments)
return Encode(uriString, UriUnescaped);
}

private string Encode(string uriString, HashSet<char> unescapedUriSet)
private JsValue Encode(string uriString, string unescapedUriSet)
{
var strLen = uriString.Length;

_stringBuilder.EnsureCapacity(uriString.Length);
_stringBuilder.Clear();
var buffer = new byte[4];

for (var k = 0; k < strLen; k++)
{
var c = uriString[k];
if (unescapedUriSet != null && unescapedUriSet.Contains(c))
if (c is >= 'a' and <= 'z' || c is >= 'A' and <= 'Z' || c is >= '0' and <= '9' || unescapedUriSet.IndexOf(c) != -1)
{
_stringBuilder.Append(c);
}
else
{
if (c >= 0xDC00 && c <= 0xDBFF)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

int v;
Expand All @@ -351,70 +341,58 @@ private string Encode(string uriString, HashSet<char> unescapedUriSet)
k++;
if (k == strLen)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

var kChar = (int)uriString[k];
if (kChar < 0xDC00 || kChar > 0xDFFF)
var kChar = (int) uriString[k];
if (kChar is < 0xDC00 or > 0xDFFF)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

v = (c - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000;
}

byte[] octets = System.Array.Empty<byte>();

if (v >= 0 && v <= 0x007F)
{
// 00000000 0zzzzzzz -> 0zzzzzzz
octets = new[] { (byte)v };
}
else if (v <= 0x07FF)
{
// 00000yyy yyzzzzzz -> 110yyyyy ; 10zzzzzz
octets = new[]
{
(byte)(0xC0 | (v >> 6)),
(byte)(0x80 | (v & 0x3F))
};
}
else if (v <= 0xD7FF)
{
// xxxxyyyy yyzzzzzz -> 1110xxxx; 10yyyyyy; 10zzzzzz
octets = new[]
{
(byte)(0xE0 | (v >> 12)),
(byte)(0x80 | ((v >> 6) & 0x3F)),
(byte)(0x80 | (v & 0x3F))
};
}
else if (v <= 0xDFFF)
{
ExceptionHelper.ThrowUriError(_realm);
}
else if (v <= 0xFFFF)
var length = 1;
switch (v)
{
octets = new[]
{
(byte) (0xE0 | (v >> 12)),
(byte) (0x80 | ((v >> 6) & 0x3F)),
(byte) (0x80 | (v & 0x3F))
};
}
else
{
octets = new[]
{
(byte) (0xF0 | (v >> 18)),
(byte) (0x80 | (v >> 12 & 0x3F)),
(byte) (0x80 | (v >> 6 & 0x3F)),
(byte) (0x80 | (v >> 0 & 0x3F))
};
case >= 0 and <= 0x007F:
// 00000000 0zzzzzzz -> 0zzzzzzz
buffer[0] = (byte) v;
break;
case <= 0x07FF:
// 00000yyy yyzzzzzz -> 110yyyyy ; 10zzzzzz
length = 2;
buffer[0] = (byte) (0xC0 | (v >> 6));
buffer[1] = (byte) (0x80 | (v & 0x3F));
break;
case <= 0xD7FF:
// xxxxyyyy yyzzzzzz -> 1110xxxx; 10yyyyyy; 10zzzzzz
length = 3;
buffer[0] = (byte) (0xE0 | (v >> 12));
buffer[1] = (byte) (0x80 | ((v >> 6) & 0x3F));
buffer[2] = (byte) (0x80 | (v & 0x3F));
break;
case <= 0xDFFF:
goto uriError;
case <= 0xFFFF:
length = 3;
buffer[0] = (byte) (0xE0 | (v >> 12));
buffer[1] = (byte) (0x80 | ((v >> 6) & 0x3F));
buffer[2] = (byte) (0x80 | (v & 0x3F));
break;
default:
length = 4;
buffer[0] = (byte) (0xF0 | (v >> 18));
buffer[1] = (byte) (0x80 | (v >> 12 & 0x3F));
buffer[2] = (byte) (0x80 | (v >> 6 & 0x3F));
buffer[3] = (byte) (0x80 | (v >> 0 & 0x3F));
break;
}

foreach (var octet in octets)
for (var i = 0; i < length; i++)
{
var octet = buffer[i];
var x1 = HexaMap[octet / 16];
var x2 = HexaMap[octet % 16];
_stringBuilder.Append('%').Append(x1).Append(x2);
Expand All @@ -423,6 +401,10 @@ private string Encode(string uriString, HashSet<char> unescapedUriSet)
}

return _stringBuilder.ToString();

uriError:
_engine.SignalError(ExceptionHelper.CreateUriError(_realm, "URI malformed"));
return null!;
}

public JsValue DecodeUri(JsValue thisObject, JsValue[] arguments)
Expand All @@ -439,14 +421,18 @@ public JsValue DecodeUriComponent(JsValue thisObject, JsValue[] arguments)
return Decode(componentString, null);
}

private string Decode(string uriString, HashSet<char>? reservedSet)
private JsValue Decode(string uriString, string? reservedSet)
{
var strLen = uriString.Length;

_stringBuilder.EnsureCapacity(strLen);
_stringBuilder.Clear();

var octets = System.Array.Empty<byte>();
#if SUPPORTS_SPAN_PARSE
Span<byte> octets = stackalloc byte[4];
#else
var octets = new byte[4];
#endif

for (var k = 0; k < strLen; k++)
{
Expand All @@ -460,21 +446,23 @@ private string Decode(string uriString, HashSet<char>? reservedSet)
var start = k;
if (k + 2 >= strLen)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

if (!IsValidHexaChar(uriString[k + 1]) || !IsValidHexaChar(uriString[k + 2]))
var c1 = uriString[k + 1];
var c2 = uriString[k + 2];
if (!IsValidHexaChar(c1) || !IsValidHexaChar(c2))
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

var B = Convert.ToByte(uriString[k + 1].ToString() + uriString[k + 2], 16);
var B = StringToIntBase16(uriString.AsSpan(k + 1, 2));

k += 2;
if ((B & 0x80) == 0)
{
C = (char)B;
if (reservedSet == null || !reservedSet.Contains(C))
if (reservedSet == null || reservedSet.IndexOf(C) == -1)
{
_stringBuilder.Append(C);
}
Expand All @@ -486,56 +474,115 @@ private string Decode(string uriString, HashSet<char>? reservedSet)
else
{
var n = 0;
for (; ((B << n) & 0x80) != 0; n++) ;
for (; ((B << n) & 0x80) != 0; n++);

if (n == 1 || n > 4)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

octets = octets.Length == n
? octets
: new byte[n];

octets[0] = B;

if (k + (3 * (n - 1)) >= strLen)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

for (var j = 1; j < n; j++)
{
k++;
if (uriString[k] != '%')
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

if (!IsValidHexaChar(uriString[k + 1]) || !IsValidHexaChar(uriString[k + 2]))
c1 = uriString[k + 1];
c2 = uriString[k + 2];
if (!IsValidHexaChar(c1) || !IsValidHexaChar(c2))
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

B = Convert.ToByte(uriString[k + 1].ToString() + uriString[k + 2], 16);
B = StringToIntBase16(uriString.AsSpan(k + 1, 2));

// B & 11000000 != 10000000
if ((B & 0xC0) != 0x80)
{
ExceptionHelper.ThrowUriError(_realm);
goto uriError;
}

k += 2;

octets[j] = B;
}

_stringBuilder.Append(Encoding.UTF8.GetString(octets, 0, octets.Length));
#if SUPPORTS_SPAN_PARSE
_stringBuilder.Append(Encoding.UTF8.GetString(octets.Slice(0, n)));
#else
_stringBuilder.Append(Encoding.UTF8.GetString(octets, 0, n));
#endif
}
}
}

return _stringBuilder.ToString();

uriError:
_engine.SignalError(ExceptionHelper.CreateUriError(_realm, "URI malformed"));
return null!;
}

private static byte StringToIntBase16(ReadOnlySpan<char> s)
{
var i = 0;
var length = s.Length;

if (s[i] == '+')
{
i++;
}

if (i + 1 < length && s[i] == '0')
{
if (s[i + 1] == 'x' || s[i + 1] == 'X')
{
i += 2;
}
}

uint result = 0;
while (i < s.Length && IsDigit(s[i], 16, out var value))
{
result = result * 16 + (uint) value;
i++;
}

return (byte) (int) result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsDigit(char c, int radix, out int result)
{
int tmp;
if ((uint)(c - '0') <= 9)
{
result = tmp = c - '0';
}
else if ((uint)(c - 'A') <= 'Z' - 'A')
{
result = tmp = c - 'A' + 10;
}
else if ((uint)(c - 'a') <= 'z' - 'a')
{
result = tmp = c - 'a' + 10;
}
else
{
result = -1;
return false;
}

return tmp < radix;
}

/// <summary>
Expand Down
Loading

0 comments on commit 8ebdc34

Please sign in to comment.