Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 46 additions & 8 deletions src/Neo.ConsoleService/CommandTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

Expand All @@ -35,10 +36,51 @@ private static char EscapedChar(char ch)
'e' => '\e',
'0' => '\0',
' ' => ' ',
_ => throw new ArgumentException($"Invalid escaped character: {ch}")
_ => throw new ArgumentException($"Invalid escaped character: \\{ch}. " +
"If you don't want to use escape character, please use backtick(`) to wrap the string.")
};
}

private static (char, int) EscapedChar(string commandLine, int index)
{
index++; // next char after \
if (index >= commandLine.Length)
{
throw new ArgumentException("Invalid escape sequence. The command line ends with a backslash character.");
}

if (commandLine[index] == 'x')
{
if (index + 2 >= commandLine.Length)
throw new ArgumentException("Invalid escape sequence. Too few hex digits after \\x");

if (!byte.TryParse(commandLine.AsSpan(index + 1, 2), NumberStyles.AllowHexSpecifier, null, out var ch))
{
throw new ArgumentException($"Invalid hex digits after \\x. " +
"If you don't want to use escape character, please use backtick(`) to wrap the string.");
}

return new((char)ch, 1 + 2);
}

if (commandLine[index] == 'u')
{
if (index + 4 >= commandLine.Length)
throw new ArgumentException("Invalid escape sequence. Too few hex digits after \\u");

if (!ushort.TryParse(commandLine.AsSpan(index + 1, 4), NumberStyles.AllowHexSpecifier, null, out var ch))
{
throw new ArgumentException($"Invalid hex digits after \\u. " +
"If you don't want to use escape character, please use backtick(`) to wrap the string.");
}

// handle invalid surrogate pairs if needed, but good enough for a cli tool
return new((char)ch, 1 + 4);
}

return new(EscapedChar(commandLine[index]), 1);
}

/// <summary>
/// Tokenize a command line
/// </summary>
Expand All @@ -61,13 +103,9 @@ public static List<CommandToken> Tokenize(this string commandLine)
var ch = commandLine[index];
if (ch == '\\' && quoteChar != CommandToken.NoEscapedChar)
{
index++;
if (index >= commandLine.Length)
{
throw new ArgumentException("Unexpected end of command line while processing escape sequence." +
" The command line ends with a backslash character.");
}
token.Append(EscapedChar(commandLine[index]));
(var escapedChar, var length) = EscapedChar(commandLine, index);
token.Append(escapedChar);
index += length;
}
else if (quoteChar != CommandToken.NoQuoteChar)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Neo.ConsoleService/ConsoleServiceBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ 2. Indicator Arguments (Named Parameters)
- Boolean: Can be specified without a value (defaults to true), true/false, 1/0, yes/no, y/n
- Enum: Case-insensitive enum value names
- JSON: Input as JSON string
- Escape characters: \\, \", \', \n, \r, \t, \v, \b, \f, \a, \e, \0, \ (whitespace).
- Escape characters: \\, \", \', \n, \r, \t, \v, \b, \f, \a, \e, \0, \ (whitespace), \xHH, \uHHHH.
If want to input without escape, quote the value with backtick(`).
""");
}
Expand Down
101 changes: 101 additions & 0 deletions tests/Neo.ConsoleService.Tests/UT_CommandTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
// modifications are permitted.

using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;

namespace Neo.ConsoleService.Tests
{
Expand Down Expand Up @@ -141,5 +142,105 @@ public void TestBackQuote()
Assert.AreEqual("123\"456", args[2].Value);
Assert.AreEqual("`123\"456`", args[2].RawValue);
}

[TestMethod]
public void TestUnicodeEscape()
{
// Test basic Unicode escape sequence
var cmd = "show \"\\u0041\""; // Should decode to 'A'
var args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("A", args[2].Value);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should test file paths. like /a/b/c or a\b\c

Copy link
Contributor Author

@Wi1l-B0t Wi1l-B0t Aug 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Escaped unicode char must start with \u, \U

\uHHHH -> support
\UHHHHHHHH -> not support now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well can you add a test, just to make sure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't hurt this test

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well can you add a test, just to make sure.

What tests?
I have already added some tests.

Copy link
Member

@cschuchardt88 cschuchardt88 Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What tests? I have already added some tests.

having file paths as arguments. because could break deploy command or open wallet


// Test Unicode escape sequence for emoji
cmd = "show \"\\uD83D\\uDE00\""; // Should decode to 😀
args = CommandTokenizer.Tokenize(cmd); // surrogate pairs
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("😀", args[2].Value);

// Test Unicode escape sequence in single quotes
cmd = "show '\\u0048\\u0065\\u006C\\u006C\\u006F'"; // Should decode to "Hello"
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello", args[2].Value);

cmd = "show '\\x48\\x65\\x6C\\x6C\\x6F'"; // Should decode to "Hello"
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello", args[2].Value);
}

[TestMethod]
public void TestUnicodeEscapeErrors()
{
// Test incomplete Unicode escape sequence
Assert.ThrowsExactly<ArgumentException>(() => CommandTokenizer.Tokenize("show \"\\u123\""));

// Test invalid hex digits
Assert.ThrowsExactly<ArgumentException>(() => CommandTokenizer.Tokenize("show \"\\u12XY\""));

// Test Unicode escape at end of string
Assert.ThrowsExactly<ArgumentException>(() => CommandTokenizer.Tokenize("show \"\\u"));
}

[TestMethod]
public void TestUnicodeEdgeCases()
{
// Test surrogate pairs - high surrogate
var cmd = "show \"\\uD83D\"";
var args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\uD83D", args[2].Value); // High surrogate

// Test surrogate pairs - low surrogate
cmd = "show \"\\uDE00\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\uDE00", args[2].Value); // Low surrogate

// Test null character
cmd = "show \"\\u0000\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\u0000", args[2].Value); // Null character

// Test maximum Unicode value
cmd = "show \"\\uFFFF\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\uFFFF", args[2].Value); // Maximum Unicode value

// Test multiple Unicode escapes in sequence
cmd = "show \"\\u0048\\u0065\\u006C\\u006C\\u006F\\u0020\\u0057\\u006F\\u0072\\u006C\\u0064\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello World", args[2].Value);

// Test Unicode escape mixed with regular characters
cmd = "show \"Hello\\u0020World\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello World", args[2].Value);
}
}
}
Loading