Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions src/Neo.ConsoleService/CommandTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

Expand Down Expand Up @@ -39,6 +40,47 @@ private static char EscapedChar(char ch)
};
}

private static (char, int) EscapedChar(string commandLine, int index)
{
index++; // next char after \
if (index >= commandLine.Length)
{
throw new ArgumentException("Unexpected end of command line while processing escape sequence." +
" The command line ends with a backslash character.");
}

if (commandLine[index] == 'x')
{
if (index + 2 >= commandLine.Length)
{
throw new ArgumentException("Unexpected end of command line while processing escape sequence." +
" Too few hex digits after \\x");
}

if (!byte.TryParse(commandLine.AsSpan(index + 1, 2), NumberStyles.AllowHexSpecifier, null, out var ch))
throw new ArgumentException($"Invalid hex digits after \\x");

return new((char)ch, 1 + 2);
}

if (commandLine[index] == 'u')
{
if (index + 4 >= commandLine.Length)
{
throw new ArgumentException("Unexpected end of command line while processing escape sequence." +
" Too few hex digits after \\u");
}

if (!ushort.TryParse(commandLine.AsSpan(index + 1, 4), NumberStyles.AllowHexSpecifier, null, out var ch))
throw new ArgumentException($"Invalid hex digits after \\u");

// handle invalid surrogate pairs if needed, but good enough for a cli tool
return new((char)ch, 1 + 4);
}

return new(EscapedChar(commandLine[index]), 1);
}

/// <summary>
/// Tokenize a command line
/// </summary>
Expand All @@ -61,13 +103,9 @@ public static List<CommandToken> Tokenize(this string commandLine)
var ch = commandLine[index];
if (ch == '\\' && quoteChar != CommandToken.NoEscapedChar)
{
index++;
if (index >= commandLine.Length)
{
throw new ArgumentException("Unexpected end of command line while processing escape sequence." +
" The command line ends with a backslash character.");
}
token.Append(EscapedChar(commandLine[index]));
(var escapedChar, var length) = EscapedChar(commandLine, index);
token.Append(escapedChar);
index += length;
}
else if (quoteChar != CommandToken.NoQuoteChar)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Neo.ConsoleService/ConsoleServiceBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ 2. Indicator Arguments (Named Parameters)
- Boolean: Can be specified without a value (defaults to true), true/false, 1/0, yes/no, y/n
- Enum: Case-insensitive enum value names
- JSON: Input as JSON string
- Escape characters: \\, \", \', \n, \r, \t, \v, \b, \f, \a, \e, \0, \ (whitespace).
- Escape characters: \\, \", \', \n, \r, \t, \v, \b, \f, \a, \e, \0, \ (whitespace), \xHH, \uHHHH.
If want to input without escape, quote the value with backtick(`).
""");
}
Expand Down
101 changes: 101 additions & 0 deletions tests/Neo.ConsoleService.Tests/UT_CommandTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
// modifications are permitted.

using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;

namespace Neo.ConsoleService.Tests
{
Expand Down Expand Up @@ -141,5 +142,105 @@ public void TestBackQuote()
Assert.AreEqual("123\"456", args[2].Value);
Assert.AreEqual("`123\"456`", args[2].RawValue);
}

[TestMethod]
public void TestUnicodeEscape()
{
// Test basic Unicode escape sequence
var cmd = "show \"\\u0041\""; // Should decode to 'A'
var args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("A", args[2].Value);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should test file paths. like /a/b/c or a\b\c

Copy link
Contributor Author

@Wi1l-B0t Wi1l-B0t Aug 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Escaped unicode char must start with \u, \U

\uHHHH -> support
\UHHHHHHHH -> not support now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well can you add a test, just to make sure.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't hurt this test

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well can you add a test, just to make sure.

What tests?
I have already added some tests.

Copy link
Member

@cschuchardt88 cschuchardt88 Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What tests? I have already added some tests.

having file paths as arguments. because could break deploy command or open wallet


// Test Unicode escape sequence for emoji
cmd = "show \"\\uD83D\\uDE00\""; // Should decode to 😀
args = CommandTokenizer.Tokenize(cmd); // surrogate pairs
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("😀", args[2].Value);

// Test Unicode escape sequence in single quotes
cmd = "show '\\u0048\\u0065\\u006C\\u006C\\u006F'"; // Should decode to "Hello"
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello", args[2].Value);

cmd = "show '\\x48\\x65\\x6C\\x6C\\x6F'"; // Should decode to "Hello"
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello", args[2].Value);
}

[TestMethod]
public void TestUnicodeEscapeErrors()
{
// Test incomplete Unicode escape sequence
Assert.ThrowsExactly<ArgumentException>(() => CommandTokenizer.Tokenize("show \"\\u123\""));

// Test invalid hex digits
Assert.ThrowsExactly<ArgumentException>(() => CommandTokenizer.Tokenize("show \"\\u12XY\""));

// Test Unicode escape at end of string
Assert.ThrowsExactly<ArgumentException>(() => CommandTokenizer.Tokenize("show \"\\u"));
}

[TestMethod]
public void TestUnicodeEdgeCases()
{
// Test surrogate pairs - high surrogate
var cmd = "show \"\\uD83D\"";
var args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\uD83D", args[2].Value); // High surrogate

// Test surrogate pairs - low surrogate
cmd = "show \"\\uDE00\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\uDE00", args[2].Value); // Low surrogate

// Test null character
cmd = "show \"\\u0000\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\u0000", args[2].Value); // Null character

// Test maximum Unicode value
cmd = "show \"\\uFFFF\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("\uFFFF", args[2].Value); // Maximum Unicode value

// Test multiple Unicode escapes in sequence
cmd = "show \"\\u0048\\u0065\\u006C\\u006C\\u006F\\u0020\\u0057\\u006F\\u0072\\u006C\\u0064\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello World", args[2].Value);

// Test Unicode escape mixed with regular characters
cmd = "show \"Hello\\u0020World\"";
args = CommandTokenizer.Tokenize(cmd);
Assert.AreEqual(3, args.Count);
Assert.AreEqual("show", args[0].Value);
Assert.AreEqual(" ", args[1].Value);
Assert.AreEqual("Hello World", args[2].Value);
}
}
}
Loading