diff --git a/include/CLI/Config.hpp b/include/CLI/Config.hpp index d5c6bc347..c9809801b 100644 --- a/include/CLI/Config.hpp +++ b/include/CLI/Config.hpp @@ -26,7 +26,7 @@ namespace detail { std::string convert_arg_for_ini(const std::string &arg, char stringQuote = '"', - char characterQuote = '\'', + char literalQuote = '\'', bool disable_multi_line = false); /// Comma separated join, adds quotes if needed @@ -35,7 +35,7 @@ std::string ini_join(const std::vector &args, char arrayStart = '[', char arrayEnd = ']', char stringQuote = '"', - char characterQuote = '\''); + char literalQuote = '\''); std::vector generate_parents(const std::string §ion, std::string &name, char parentSeparator); diff --git a/include/CLI/ConfigFwd.hpp b/include/CLI/ConfigFwd.hpp index 76fc9fa7e..fe4071f7a 100644 --- a/include/CLI/ConfigFwd.hpp +++ b/include/CLI/ConfigFwd.hpp @@ -92,8 +92,8 @@ class ConfigBase : public Config { char valueDelimiter = '='; /// the character to use around strings char stringQuote = '"'; - /// the character to use around single characters - char characterQuote = '\''; + /// the character to use around single characters and literal strings + char literalQuote = '\''; /// the maximum number of layers to allow uint8_t maximumLayers{255}; /// the separator used to separator parent layers @@ -132,7 +132,7 @@ class ConfigBase : public Config { /// Specify the quote characters used around strings and characters ConfigBase *quoteCharacter(char qString, char qChar) { stringQuote = qString; - characterQuote = qChar; + literalQuote = qChar; return this; } /// Specify the maximum number of parents diff --git a/include/CLI/StringTools.hpp b/include/CLI/StringTools.hpp index 2356a70af..1922d9bf8 100644 --- a/include/CLI/StringTools.hpp +++ b/include/CLI/StringTools.hpp @@ -120,6 +120,9 @@ inline std::string trim_copy(const std::string &str) { /// remove quotes at the front and back of a string either '"' or '\'' CLI11_INLINE std::string &remove_quotes(std::string &str); +/// remove quotes from all elements of a string vector and process escaped components +CLI11_INLINE void remove_quotes(std::vector &args); + /// Add a leader to the beginning of all new lines (nothing is added /// at the start of the first line). `"; "` would be for ini files /// @@ -212,9 +215,13 @@ template inline std::string find_and_modify(std::string str, return str; } +/// close a sequence of characters indicated by a closure character. Brackets allows sub sequences +/// recognized bracket sequences include "'`[(<{ other closure characters are assumed to be literal strings +CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char); + /// Split a string '"one two" "three"' into 'one two', 'three' /// Quote characters can be ` ' or " or bracket characters [{(< with matching to the matching bracket -CLI11_INLINE std::vector split_up(std::string str, char delimiter = '\0', bool removeQuotes = true); +CLI11_INLINE std::vector split_up(std::string str, char delimiter = '\0'); /// get the value of an environmental variable or empty string if empty CLI11_INLINE std::string get_environment_value(const std::string &env_name); @@ -246,6 +253,9 @@ CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string); /// extract an escaped binary_string CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string); +/// process a quoted string, remove the quotes and if appropriate handle escaped characters +CLI11_INLINE bool process_quoted_string(std::string &str, char string_char = '\"', char literal_char = '\''); + } // namespace detail // [CLI11:string_tools_hpp:end] diff --git a/include/CLI/impl/App_inl.hpp b/include/CLI/impl/App_inl.hpp index ee9bee336..f258f7308 100644 --- a/include/CLI/impl/App_inl.hpp +++ b/include/CLI/impl/App_inl.hpp @@ -579,6 +579,11 @@ CLI11_INLINE void App::parse(std::string commandline, bool program_name_included auto args = detail::split_up(std::move(commandline)); // remove all empty strings args.erase(std::remove(args.begin(), args.end(), std::string{}), args.end()); + try { + detail::remove_quotes(args); + } catch(const std::invalid_argument &arg) { + throw CLI::ParseError(arg.what(), CLI::ExitCodes::InvalidError); + } std::reverse(args.begin(), args.end()); parse(std::move(args)); } @@ -1569,7 +1574,7 @@ CLI11_INLINE bool App::_parse_single(std::vector &args, bool &posit case detail::Classifier::SHORT: case detail::Classifier::WINDOWS_STYLE: // If already parsed a subcommand, don't accept options_ - _parse_arg(args, classifier, false); + retval = _parse_arg(args, classifier, false); break; case detail::Classifier::NONE: // Probably a positional or something for a parent (sub)command diff --git a/include/CLI/impl/Config_inl.hpp b/include/CLI/impl/Config_inl.hpp index 2b98509cd..4723c5015 100644 --- a/include/CLI/impl/Config_inl.hpp +++ b/include/CLI/impl/Config_inl.hpp @@ -19,18 +19,19 @@ namespace CLI { // [CLI11:config_inl_hpp:verbatim] -static constexpr auto triple_quote = R"(""")"; +static constexpr auto multiline_literal_quote = R"(''')"; +static constexpr auto multiline_string_quote = R"(""")"; namespace detail { CLI11_INLINE bool is_printable(const std::string &test_string) { return std::all_of(test_string.begin(), test_string.end(), [](char x) { - return (isprint(static_cast(x)) != 0 || x == '\n'); + return (isprint(static_cast(x)) != 0 || x == '\n' || x == '\t'); }); } CLI11_INLINE std::string -convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuote, bool disable_multi_line) { +convert_arg_for_ini(const std::string &arg, char stringQuote, char literalQuote, bool disable_multi_line) { if(arg.empty()) { return std::string(2, stringQuote); } @@ -53,13 +54,10 @@ convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuot if(isprint(static_cast(arg.front())) == 0) { return binary_escape_string(arg); } - if(arg == "\\") { - return std::string(1, stringQuote) + "\\\\" + stringQuote; - } if(arg == "'") { return std::string(1, stringQuote) + "'" + stringQuote; } - return std::string(1, characterQuote) + arg + characterQuote; + return std::string(1, literalQuote) + arg + literalQuote; } // handle hex, binary or octal arguments if(arg.front() == '0') { @@ -82,13 +80,10 @@ convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuot if(!is_printable(arg)) { return binary_escape_string(arg); } - if(arg.find_first_of('\n') != std::string::npos) { - if(disable_multi_line) { - return binary_escape_string(arg); - } - return std::string(triple_quote) + arg + triple_quote; - } if(detail::has_escapable_character(arg)) { + if(arg.size() > 100 && !disable_multi_line) { + return std::string(multiline_literal_quote) + arg + multiline_literal_quote; + } return std::string(1, stringQuote) + detail::add_escaped_characters(arg) + stringQuote; } return std::string(1, stringQuote) + arg + stringQuote; @@ -99,7 +94,7 @@ CLI11_INLINE std::string ini_join(const std::vector &args, char arrayStart, char arrayEnd, char stringQuote, - char characterQuote) { + char literalQuote) { bool disable_multi_line{false}; std::string joined; if(args.size() > 1 && arrayStart != '\0') { @@ -114,7 +109,7 @@ CLI11_INLINE std::string ini_join(const std::vector &args, joined.push_back(' '); } } - joined.append(convert_arg_for_ini(arg, stringQuote, characterQuote, disable_multi_line)); + joined.append(convert_arg_for_ini(arg, stringQuote, literalQuote, disable_multi_line)); } if(args.size() > 1 && arrayEnd != '\0') { joined.push_back(arrayEnd); @@ -233,7 +228,7 @@ inline std::vector ConfigBase::from_config(std::istream &input) cons if(len < 3) { continue; } - if(line.compare(0, 3, triple_quote) == 0 || line.compare(0, 3, "'''") == 0) { + if(line.compare(0, 3, multiline_string_quote) == 0 || line.compare(0, 3, multiline_literal_quote) == 0) { inMLineComment = true; auto cchar = line.front(); while(inMLineComment) { @@ -277,19 +272,15 @@ inline std::vector ConfigBase::from_config(std::istream &input) cons // comment lines if(line.front() == ';' || line.front() == '#' || line.front() == commentChar) { - if(line.compare(2, 13, "cli11:literal") == 0) { - literalName = true; - getline(input, buffer); - line = detail::trim_copy(buffer); - } else { - continue; - } + continue; + } + std::size_t search_start = 0; + if(line.front() == stringQuote || line.front() == literalQuote || line.front() == '`') { + search_start = detail::close_sequence(line, 0, line.front()); } - // Find = in string, split and recombine - auto delimiter_pos = line.find_first_of(valueDelimiter, 1); - auto comment_pos = (literalName) ? std::string::npos : line.find_first_of(commentChar); - + auto delimiter_pos = line.find_first_of(valueDelimiter, search_start + 1); + auto comment_pos = line.find_first_of(commentChar, search_start); if(comment_pos < delimiter_pos) { delimiter_pos = std::string::npos; } @@ -297,9 +288,10 @@ inline std::vector ConfigBase::from_config(std::istream &input) cons name = detail::trim_copy(line.substr(0, delimiter_pos)); std::string item = detail::trim_copy(line.substr(delimiter_pos + 1, std::string::npos)); - bool mlquote = (item.compare(0, 3, "'''") == 0 || item.compare(0, 3, triple_quote) == 0); + bool mlquote = + (item.compare(0, 3, multiline_literal_quote) == 0 || item.compare(0, 3, multiline_string_quote) == 0); if(!mlquote && comment_pos != std::string::npos && !literalName) { - auto citems = detail::split_up(item, commentChar, false); + auto citems = detail::split_up(item, commentChar); item = detail::trim_copy(citems.front()); } if(mlquote) { @@ -337,6 +329,9 @@ inline std::vector ConfigBase::from_config(std::istream &input) cons if(!item.empty() && item.back() == '\n') { item.pop_back(); } + if(keyChar == '\"') { + item = detail::remove_escaped_characters(item); + } } else { if(lineExtension) { detail::trim(l2); @@ -358,11 +353,11 @@ inline std::vector ConfigBase::from_config(std::istream &input) cons detail::trim(multiline); item += multiline; } - items_buffer = detail::split_up(item.substr(1, item.length() - 2), aSep, false); + items_buffer = detail::split_up(item.substr(1, item.length() - 2), aSep); } else if((isDefaultArray || isINIArray) && item.find_first_of(aSep) != std::string::npos) { - items_buffer = detail::split_up(item, aSep, false); + items_buffer = detail::split_up(item, aSep); } else if((isDefaultArray || isINIArray) && item.find_first_of(' ') != std::string::npos) { - items_buffer = detail::split_up(item, '\0', false); + items_buffer = detail::split_up(item, '\0'); } else { items_buffer = {item}; } @@ -370,17 +365,15 @@ inline std::vector ConfigBase::from_config(std::istream &input) cons name = detail::trim_copy(line.substr(0, comment_pos)); items_buffer = {"true"}; } - if(name.find(parentSeparatorChar) == std::string::npos) { - if(!literalName) { - detail::remove_quotes(name); - } - } - // clean up quotes on the items and check for escaped strings - for(auto &it : items_buffer) { - detail::remove_quotes(it); - if(detail::is_binary_escaped_string(it)) { - it = detail::extract_binary_string(it); + try { + literalName = detail::process_quoted_string(name, stringQuote, literalQuote); + + // clean up quotes on the items and check for escaped strings + for(auto &it : items_buffer) { + detail::process_quoted_string(it, stringQuote, literalQuote); } + } catch(const std::invalid_argument &ia) { + throw CLI::ParseError(ia.what(), CLI::ExitCodes::InvalidError); } std::vector parents; if(literalName) { @@ -461,16 +454,17 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description, continue; } } - std::string name = prefix + opt->get_single_name(); - if(name == prefix) { + std::string single_name = opt->get_single_name(); + if(single_name.empty()) { continue; } + std::string value = detail::ini_join( - opt->reduced_results(), arraySeparator, arrayStart, arrayEnd, stringQuote, characterQuote); + opt->reduced_results(), arraySeparator, arrayStart, arrayEnd, stringQuote, literalQuote); if(value.empty() && default_also) { if(!opt->get_default_str().empty()) { - value = detail::convert_arg_for_ini(opt->get_default_str(), stringQuote, characterQuote, false); + value = detail::convert_arg_for_ini(opt->get_default_str(), stringQuote, literalQuote, false); } else if(opt->get_expected_min() == 0) { value = "false"; } else if(opt->get_run_callback_for_default()) { @@ -479,15 +473,16 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description, } if(!value.empty()) { + if(!opt->get_fnames().empty()) { try { - value = opt->get_flag_value(name, value); + value = opt->get_flag_value(single_name, value); } catch(const CLI::ArgumentMismatch &) { bool valid{false}; for(const auto &test_name : opt->get_fnames()) { try { value = opt->get_flag_value(test_name, value); - name = test_name; + single_name = test_name; valid = true; } catch(const CLI::ArgumentMismatch &) { continue; @@ -495,7 +490,7 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description, } if(!valid) { value = detail::ini_join( - opt->results(), arraySeparator, arrayStart, arrayEnd, stringQuote, characterQuote); + opt->results(), arraySeparator, arrayStart, arrayEnd, stringQuote, literalQuote); } } } @@ -503,13 +498,27 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description, out << '\n'; out << commentLead << detail::fix_newlines(commentLead, opt->get_description()) << '\n'; } - if(name.find_first_of(commentTest) != std::string::npos || name.compare(0, 3, triple_quote) == 0 || - name.compare(0, 3, "'''") == 0 || (name.front() == '[' && name.back() == ']') || - (name.front() == stringQuote && name.back() == stringQuote) || - (name.front() == characterQuote && name.back() == characterQuote) || - (name.front() == '`' && name.back() == '`')) { - out << commentChar << " cli11:literal\n"; + if(single_name.find_first_of(commentTest) != std::string::npos || + single_name.compare(0, 3, multiline_string_quote) == 0 || + single_name.compare(0, 3, multiline_literal_quote) == 0 || + (single_name.front() == '[' && single_name.back() == ']') || + (single_name.find_first_of(stringQuote) != std::string::npos) || + (single_name.find_first_of(literalQuote) != std::string::npos) || + (single_name.find_first_of('`') != std::string::npos)) { + if(single_name.find_first_of(literalQuote) == std::string::npos) { + single_name.insert(0, 1, literalQuote); + single_name.push_back(literalQuote); + } else { + if(detail::has_escapable_character(single_name)) { + single_name = detail::add_escaped_characters(single_name); + } + single_name.insert(0, 1, stringQuote); + single_name.push_back(stringQuote); + } } + + std::string name = prefix + single_name; + out << name << valueDelimiter << value << '\n'; } } diff --git a/include/CLI/impl/Option_inl.hpp b/include/CLI/impl/Option_inl.hpp index 2986b6bb0..987f39987 100644 --- a/include/CLI/impl/Option_inl.hpp +++ b/include/CLI/impl/Option_inl.hpp @@ -609,7 +609,12 @@ CLI11_INLINE void Option::_reduce_results(results_t &out, const results_t &origi throw ArgumentMismatch::AtLeast(get_name(), static_cast(num_min), original.size()); } if(original.size() > num_max) { - throw ArgumentMismatch::AtMost(get_name(), static_cast(num_max), original.size()); + if(original.size() == 2 && num_max == 1 && original[1] == "%%" && original[0] == "{}") { + // this condition is a trap for the following empty indicator check on config files + out = original; + } else { + throw ArgumentMismatch::AtMost(get_name(), static_cast(num_max), original.size()); + } } break; } diff --git a/include/CLI/impl/StringTools_inl.hpp b/include/CLI/impl/StringTools_inl.hpp index 40028726d..4f186cc95 100644 --- a/include/CLI/impl/StringTools_inl.hpp +++ b/include/CLI/impl/StringTools_inl.hpp @@ -61,7 +61,17 @@ CLI11_INLINE std::string &rtrim(std::string &str, const std::string &filter) { } CLI11_INLINE std::string &remove_quotes(std::string &str) { - if(str.length() > 1 && (str.front() == '"' || str.front() == '\'')) { + if(str.length() > 1 && (str.front() == '"' || str.front() == '\'' || str.front() == '`')) { + if(str.front() == str.back()) { + str.pop_back(); + str.erase(str.begin(), str.begin() + 1); + } + } + return str; +} + +CLI11_INLINE std::string &remove_outer(std::string &str, char key) { + if(str.length() > 1 && (str.front() == key)) { if(str.front() == str.back()) { str.pop_back(); str.erase(str.begin(), str.begin() + 1); @@ -181,9 +191,10 @@ find_member(std::string name, const std::vector names, bool ignore_ return (it != std::end(names)) ? (it - std::begin(names)) : (-1); } -static const std::string escapedChars("'\"`])>}\\"); -static const std::string bracketChars{"'\"`[(<{"}; -static const std::string matchBracketChars("'\"`])>}"); +static const std::string escapedChars("\b\t\n\f\r\"\\"); +static const std::string escapedCharsCode("btnfr\"\\"); +static const std::string bracketChars{"\"'`[(<{"}; +static const std::string matchBracketChars("\"'`])>}"); CLI11_INLINE bool has_escapable_character(const std::string &str) { return (str.find_first_of(escapedChars) != std::string::npos); @@ -193,25 +204,109 @@ CLI11_INLINE std::string add_escaped_characters(const std::string &str) { std::string out; out.reserve(str.size() + 4); for(char s : str) { - if(escapedChars.find_first_of(s) != std::string::npos) { + auto sloc = escapedChars.find_first_of(s); + if(sloc != std::string::npos) { out.push_back('\\'); + out.push_back(escapedCharsCode[sloc]); + } else { + out.push_back(s); } - out.push_back(s); } return out; } +CLI11_INLINE std::uint32_t hexConvert(char hc) { + int hcode{0}; + if(hc >= '0' && hc <= '9') { + hcode = (hc - '0'); + } else if(hc >= 'A' && hc <= 'F') { + hcode = (hc - 'A' + 10); + } else if(hc >= 'a' && hc <= 'f') { + hcode = (hc - 'a' + 10); + } else { + hcode = -1; + } + return static_cast(hcode); +} + +CLI11_INLINE char make_char(std::uint32_t code) { return static_cast(static_cast(code)); } + +CLI11_INLINE void append_codepoint(std::string &str, std::uint32_t code) { + if(code < 0x80) { // ascii code equivalent + str.push_back(static_cast(code)); + } else if(code < 0x800) { // \u0080 to \u07FF + // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111 + str.push_back(make_char(0xC0 | code >> 6)); + str.push_back(make_char(0x80 | (code & 0x3F))); + } else if(code < 0x10000) { // U+0800...U+FFFF + if(0xD800 <= code && code <= 0xDFFF) { + throw std::invalid_argument("[0xD800, 0xDFFF] are not valid UTF-8."); + } + // 1110yyyy 10yxxxxx 10xxxxxx + str.push_back(make_char(0xE0 | code >> 12)); + str.push_back(make_char(0x80 | (code >> 6 & 0x3F))); + str.push_back(make_char(0x80 | (code & 0x3F))); + } else if(code < 0x110000) { // U+010000 ... U+10FFFF + // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx + str.push_back(make_char(0xF0 | code >> 18)); + str.push_back(make_char(0x80 | (code >> 12 & 0x3F))); + str.push_back(make_char(0x80 | (code >> 6 & 0x3F))); + str.push_back(make_char(0x80 | (code & 0x3F))); + } +} + CLI11_INLINE std::string remove_escaped_characters(const std::string &str) { std::string out; out.reserve(str.size()); for(auto loc = str.begin(); loc < str.end(); ++loc) { if(*loc == '\\') { - if(escapedChars.find_first_of(*(loc + 1)) != std::string::npos) { - out.push_back(*(loc + 1)); + if(str.end() - loc < 2) { + throw std::invalid_argument("invalid escape sequence " + str); + } + auto ecloc = escapedCharsCode.find_first_of(*(loc + 1)); + if(ecloc != std::string::npos) { + out.push_back(escapedChars[ecloc]); + ++loc; + } else if(*(loc + 1) == 'u') { + // must have 4 hex characters + if(str.end() - loc < 6) { + throw std::invalid_argument("unicode sequence must have 4 hex codes " + str); + } + std::uint32_t code{0}; + std::uint32_t mplier{16 * 16 * 16}; + for(int ii = 2; ii < 6; ++ii) { + std::uint32_t res = hexConvert(*(loc + ii)); + if(res > 0x0F) { + throw std::invalid_argument("unicode sequence must have 4 hex codes " + str); + } + code += res * mplier; + mplier = mplier / 16; + } + append_codepoint(out, code); + loc += 5; + } else if(*(loc + 1) == 'U') { + // must have 8 hex characters + if(str.end() - loc < 10) { + throw std::invalid_argument("unicode sequence must have 8 hex codes " + str); + } + std::uint32_t code{0}; + std::uint32_t mplier{16 * 16 * 16 * 16 * 16 * 16 * 16}; + for(int ii = 2; ii < 10; ++ii) { + std::uint32_t res = hexConvert(*(loc + ii)); + if(res > 0x0F) { + throw std::invalid_argument("unicode sequence must have 8 hex codes " + str); + } + code += res * mplier; + mplier = mplier / 16; + } + append_codepoint(out, code); + loc += 9; + } else if(*(loc + 1) == '0') { + out.push_back('\0'); ++loc; } else { - out.push_back(*loc); + throw std::invalid_argument(std::string("unrecognized escape sequence \\") + *(loc + 1) + " in " + str); } } else { out.push_back(*loc); @@ -220,39 +315,73 @@ CLI11_INLINE std::string remove_escaped_characters(const std::string &str) { return out; } -CLI11_INLINE std::pair close_sequence(const std::string &str, std::size_t start, char closure_char) { - std::string closures; - closures.push_back(closure_char); +CLI11_INLINE std::size_t close_string_quote(const std::string &str, std::size_t start, char closure_char) { + std::size_t loc{0}; + for(loc = start + 1; loc < str.size(); ++loc) { + if(str[loc] == closure_char) { + break; + } + if(str[loc] == '\\') { + // skip the next character for escaped sequences + ++loc; + } + } + return loc; +} + +CLI11_INLINE std::size_t close_literal_quote(const std::string &str, std::size_t start, char closure_char) { + auto loc = str.find_first_of(closure_char, start + 1); + return (loc != std::string::npos ? loc : str.size()); +} + +CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char) { + + auto bracket_loc = matchBracketChars.find(closure_char); + switch(bracket_loc) { + case 0: + return close_string_quote(str, start, closure_char); + case 1: + case 2: + case std::string::npos: + return close_literal_quote(str, start, closure_char); + default: + break; + } + + std::string closures(1, closure_char); auto loc = start + 1; - bool inQuote = closure_char == '"' || closure_char == '\'' || closure_char == '`'; - bool hasControlSequence{false}; + while(loc < str.size()) { if(str[loc] == closures.back()) { closures.pop_back(); if(closures.empty()) { - return {loc, hasControlSequence}; - } - inQuote = false; - } - if(str[loc] == '\\') { - if(inQuote) { - hasControlSequence = true; + return loc; } - ++loc; } - if(!inQuote) { - auto bracket_loc = bracketChars.find(str[loc]); - if(bracket_loc != std::string::npos) { + bracket_loc = bracketChars.find(str[loc]); + if(bracket_loc != std::string::npos) { + switch(bracket_loc) { + case 0: + loc = close_string_quote(str, loc, str[loc]); + break; + case 1: + case 2: + loc = close_literal_quote(str, loc, str[loc]); + break; + default: closures.push_back(matchBracketChars[bracket_loc]); - inQuote = (bracket_loc <= 2); + break; } } ++loc; } - return {loc, hasControlSequence}; + if(loc > str.size()) { + loc = str.size(); + } + return loc; } -CLI11_INLINE std::vector split_up(std::string str, char delimiter, bool removeQuotes) { +CLI11_INLINE std::vector split_up(std::string str, char delimiter) { auto find_ws = [delimiter](char ch) { return (delimiter == '\0') ? std::isspace(ch, std::locale()) : (ch == delimiter); @@ -260,20 +389,22 @@ CLI11_INLINE std::vector split_up(std::string str, char delimiter, trim(str); std::vector output; - bool embeddedQuote = false; - std::size_t adjust = removeQuotes ? 1 : 0; while(!str.empty()) { if(bracketChars.find_first_of(str[0]) != std::string::npos) { auto bracketLoc = bracketChars.find_first_of(str[0]); - auto closure = close_sequence(str, 0, matchBracketChars[bracketLoc]); - auto end = closure.first; - output.push_back(str.substr(adjust, end + 1 - 2 * adjust)); - if(end + 2 < str.size()) { - str = str.substr(end + 2); - } else { + auto end = close_sequence(str, 0, matchBracketChars[bracketLoc]); + if(end >= str.size()) { + output.push_back(std::move(str)); str.clear(); + } else { + output.push_back(str.substr(0, end + 1)); + if(end + 2 < str.size()) { + str = str.substr(end + 2); + } else { + str.clear(); + } } - embeddedQuote = embeddedQuote || closure.second; + } else { auto it = std::find_if(std::begin(str), std::end(str), find_ws); if(it != std::end(str)) { @@ -285,11 +416,6 @@ CLI11_INLINE std::vector split_up(std::string str, char delimiter, str.clear(); } } - // transform any embedded quotes into the regular character if the quotes are removed - if(embeddedQuote && removeQuotes) { - output.back() = remove_escaped_characters(output.back()); - embeddedQuote = false; - } trim(str); } return output; @@ -373,30 +499,12 @@ CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string if(escaped_string[loc] == '\\' && (escaped_string[loc + 1] == 'x' || escaped_string[loc + 1] == 'X')) { auto c1 = escaped_string[loc + 2]; auto c2 = escaped_string[loc + 3]; - int res{0}; - bool invalid{false}; - if(c1 >= '0' && c1 <= '9') { - res = (c1 - '0') * 16; - } else if(c1 >= 'A' && c1 <= 'F') { - res = (c1 - 'A' + 10) * 16; - } else if(c1 >= 'a' && c1 <= 'f') { - res = (c1 - 'a' + 10) * 16; - } else { - invalid = true; - } - if(c2 >= '0' && c2 <= '9') { - res += (c2 - '0'); - } else if(c2 >= 'A' && c2 <= 'F') { - res += (c2 - 'A' + 10); - } else if(c2 >= 'a' && c2 <= 'f') { - res += (c2 - 'a' + 10); - } else { - invalid = true; - } - if(!invalid) { + std::uint32_t res1 = hexConvert(c1); + std::uint32_t res2 = hexConvert(c2); + if(res1 <= 0x0F && res2 <= 0x0F) { loc += 4; - outstring.push_back(static_cast(res)); + outstring.push_back(static_cast(res1 * 16 + res2)); continue; } } @@ -406,6 +514,40 @@ CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string return outstring; } +CLI11_INLINE void remove_quotes(std::vector &args) { + for(auto &arg : args) { + if(arg.front() == '\"' && arg.back() == '\"') { + remove_quotes(arg); + // only remove escaped for string arguments not literal strings + arg = remove_escaped_characters(arg); + } else { + remove_quotes(arg); + } + } +} + +CLI11_INLINE bool process_quoted_string(std::string &str, char string_char, char literal_char) { + if(str.size() <= 1) { + return false; + } + if(detail::is_binary_escaped_string(str)) { + str = detail::extract_binary_string(str); + return true; + } + if(str.front() == string_char && str.back() == string_char) { + detail::remove_outer(str, string_char); + if(str.find_first_of('\\') != std::string::npos) { + str = detail::remove_escaped_characters(str); + } + return true; + } + if((str.front() == literal_char || str.front() == '`') && str.back() == str.front()) { + detail::remove_outer(str, str.front()); + return true; + } + return false; +} + std::string get_environment_value(const std::string &env_name) { char *buffer = nullptr; std::string ename_string; diff --git a/tests/AppTest.cpp b/tests/AppTest.cpp index 2d753527d..2839ff904 100644 --- a/tests/AppTest.cpp +++ b/tests/AppTest.cpp @@ -414,10 +414,10 @@ TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedEscapedCharacters app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); - app.parse(R"raw(--string="this is my \"quoted\" string" -t 'qst\'ring 2' -m=`"quoted\` string"`")raw"); - CHECK("this is my \"quoted\" string" == str); - CHECK("qst\'ring 2" == str2); - CHECK("\"quoted` string\"" == str3); + app.parse(R"raw(--string="this is my \n\"quoted\" string" -t 'qst\ring 2' -m=`"quoted\n string"`")raw"); + CHECK("this is my \n\"quoted\" string" == str); // escaped + CHECK("qst\\ring 2" == str2); // literal + CHECK("\"quoted\\n string\"" == str3); // double quoted literal } TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultipleWithEqual", "[app]") { diff --git a/tests/ConfigFileTest.cpp b/tests/ConfigFileTest.cpp index d59ca0eb2..55ceac2cd 100644 --- a/tests/ConfigFileTest.cpp +++ b/tests/ConfigFileTest.cpp @@ -27,6 +27,15 @@ TEST_CASE("StringBased: convert_arg_for_ini", "[config]") { CHECK("-22E14" == CLI::detail::convert_arg_for_ini("-22E14")); CHECK("'a'" == CLI::detail::convert_arg_for_ini("a")); + + CHECK("'\\'" == CLI::detail::convert_arg_for_ini("\\")); + + CHECK("\"'\"" == CLI::detail::convert_arg_for_ini("'")); + + std::string tstring1; + tstring1.push_back('\0'); + // binary string conversion single character + CHECK("'B\"(\\x00)\"'" == CLI::detail::convert_arg_for_ini(tstring1)); // hex CHECK("0x5461FAED" == CLI::detail::convert_arg_for_ini("0x5461FAED")); // hex fail @@ -2713,7 +2722,8 @@ TEST_CASE_METHOD(TApp, "TomlOutputMultilineString", "[config]") { std::string desc = "flag"; app.add_option("--opt", desc); - std::string argString = "this is a very long string \n that covers multiple lines \n and should be long"; + std::string argString = "this is a very long string \n that covers multiple lines \nand should be longer than 100 " + "characters \nto trigger the multiline string"; args = {"--opt", argString}; run(); diff --git a/tests/FuzzFailTest.cpp b/tests/FuzzFailTest.cpp index fd6af2108..39e37fc5f 100644 --- a/tests/FuzzFailTest.cpp +++ b/tests/FuzzFailTest.cpp @@ -50,7 +50,7 @@ TEST_CASE("file_fail") { CLI::FuzzApp fuzzdata; auto app = fuzzdata.generateApp(); - int index = GENERATE(range(1, 3)); + int index = GENERATE(range(1, 5)); auto parseData = loadFailureFile("fuzz_file_fail", index); std::stringstream out(parseData); try { @@ -63,7 +63,7 @@ TEST_CASE("app_file_gen_fail") { CLI::FuzzApp fuzzdata; auto app = fuzzdata.generateApp(); - int index = GENERATE(range(1, 33)); + int index = GENERATE(range(1, 40)); std::string optionString, flagString; auto parseData = loadFailureFile("fuzz_app_file_fail", index); if(parseData.size() > 25) { diff --git a/tests/HelpersTest.cpp b/tests/HelpersTest.cpp index 82b972552..4397b1d4e 100644 --- a/tests/HelpersTest.cpp +++ b/tests/HelpersTest.cpp @@ -165,6 +165,7 @@ TEST_CASE("String: InvalidName", "[helpers]") { CHECK(CLI::detail::valid_name_string("b@d2?")); CHECK(CLI::detail::valid_name_string("2vali?d")); CHECK_FALSE(CLI::detail::valid_name_string("!valid")); + CHECK_FALSE(CLI::detail::valid_name_string("!va\nlid")); } TEST_CASE("StringTools: Modify", "[helpers]") { @@ -250,6 +251,11 @@ TEST_CASE("StringTools: binaryEscapseConversion", "[helpers]") { std::string rstring = CLI::detail::extract_binary_string(estring); CHECK(rstring == testString2); + CLI::detail::remove_quotes(estring); + CHECK(CLI::detail::is_binary_escaped_string(estring)); + std::string rstringrq = CLI::detail::extract_binary_string(estring); + CHECK(rstringrq == testString2); + testString2.push_back(0); testString2.push_back(static_cast(197)); testString2.push_back(78); @@ -272,11 +278,13 @@ TEST_CASE("StringTools: binaryStrings", "[helpers]") { CHECK(CLI::detail::extract_binary_string(rstring).empty()); rstring = "B\"(\\x35\\xa7)\""; + CHECK(CLI::detail::is_binary_escaped_string(rstring)); auto result = CLI::detail::extract_binary_string(rstring); CHECK(result[0] == static_cast(0x35)); CHECK(result[1] == static_cast(0xa7)); - rstring = "B\"(\\x3e\\xf7)\""; + rstring = "'B\"(\\x3e\\xf7)\"'"; + CHECK(CLI::detail::is_binary_escaped_string(rstring)); result = CLI::detail::extract_binary_string(rstring); CHECK(result[0] == static_cast(0x3e)); CHECK(result[1] == static_cast(0xf7)); @@ -300,12 +308,126 @@ TEST_CASE("StringTools: binaryStrings", "[helpers]") { CHECK(result == "\\XEM\\X7K"); } +/// these are provided for compatibility with the char8_t for C++20 that breaks stuff +std::string from_u8string(const std::string &s) { return s; } +std::string from_u8string(std::string &&s) { return std::move(s); } +#if defined(__cpp_lib_char8_t) +std::string from_u8string(const std::u8string &s) { return std::string(s.begin(), s.end()); } +#elif defined(__cpp_char8_t) +std::string from_u8string(const char8_t *s) { return std::string(reinterpret_cast(s)); } +#endif + TEST_CASE("StringTools: escapeConversion", "[helpers]") { CHECK(CLI::detail::remove_escaped_characters("test\\\"") == "test\""); - CHECK(CLI::detail::remove_escaped_characters("test\\}") == "test}"); CHECK(CLI::detail::remove_escaped_characters("test\\\\") == "test\\"); - CHECK(CLI::detail::remove_escaped_characters("test\\\\") == "test\\"); - CHECK(CLI::detail::remove_escaped_characters("test\\k") == "test\\k"); + CHECK(CLI::detail::remove_escaped_characters("test\\b") == "test\b"); + CHECK(CLI::detail::remove_escaped_characters("test\\t") == "test\t"); + CHECK(CLI::detail::remove_escaped_characters("test\\n\\r\\t\\f") == "test\n\r\t\f"); + CHECK(CLI::detail::remove_escaped_characters("test\\r") == "test\r"); + CHECK(CLI::detail::remove_escaped_characters("test\\f") == "test\f"); + std::string zstring = "test"; + zstring.push_back('\0'); + zstring.append("test\n"); + CHECK(CLI::detail::remove_escaped_characters("test\\0test\\n") == zstring); + + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\m_bad"), std::invalid_argument); + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\"), std::invalid_argument); +} + +TEST_CASE("StringTools: quotedString", "[helpers]") { + + std::string rstring = "'B\"(\\x35\\xa7)\"'"; + auto s2 = rstring; + CLI::detail::process_quoted_string(s2); + CHECK(s2[0] == static_cast(0x35)); + CHECK(s2[1] == static_cast(0xa7)); + s2 = rstring; + CLI::detail::remove_quotes(s2); + CLI::detail::process_quoted_string(s2); + CHECK(s2[0] == static_cast(0x35)); + CHECK(s2[1] == static_cast(0xa7)); + + std::string qbase = R"("this\nis\na\nfour\tline test")"; + std::string qresult = "this\nis\na\nfour\tline test"; + + std::string q1 = qbase; + + // test remove quotes and escape processing + CLI::detail::process_quoted_string(q1); + CHECK(q1 == qresult); + + std::string q2 = qbase; + q2.front() = '\''; + q2.pop_back(); + q2.push_back('\''); + std::string qliteral = qbase.substr(1); + qliteral.pop_back(); + + // test remove quotes for literal string + CHECK(CLI::detail::process_quoted_string(q2)); + CHECK(q2 == qliteral); + + std::string q3 = qbase; + q3.front() = '`'; + q3.pop_back(); + q3.push_back('`'); + + // test remove quotes for literal string + CHECK(CLI::detail::process_quoted_string(q3)); + CHECK(q3 == qliteral); + + std::string q4 = qbase; + q4.front() = '|'; + q4.pop_back(); + q4.push_back('|'); + + // check that it doesn't process + CHECK_FALSE(CLI::detail::process_quoted_string(q4)); + // test custom string quote character + CHECK(CLI::detail::process_quoted_string(q4, '|')); + CHECK(q4 == qresult); + + std::string q5 = qbase; + q5.front() = '?'; + q5.pop_back(); + q5.push_back('?'); + + // test custom literal quote character + CHECK(CLI::detail::process_quoted_string(q5, '|', '?')); + CHECK(q5 == qliteral); + + q3 = qbase; + q3.front() = '`'; + q3.pop_back(); + q3.push_back('`'); + + // test that '`' still works regardless of the other specified characters + CHECK(CLI::detail::process_quoted_string(q3)); + CHECK(q3 == qliteral); +} + +TEST_CASE("StringTools: unicode_literals", "[helpers]") { + + CHECK(CLI::detail::remove_escaped_characters("test\\u03C0\\u00e9") == from_u8string(u8"test\u03C0\u00E9")); + CHECK(CLI::detail::remove_escaped_characters("test\\u73C0\\u0057") == from_u8string(u8"test\u73C0\u0057")); + + CHECK(CLI::detail::remove_escaped_characters("test\\U0001F600\\u00E9") == from_u8string(u8"test\U0001F600\u00E9")); + + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001M600\\u00E9"), std::invalid_argument); + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E600\\u00M9"), std::invalid_argument); + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E600\\uD8E9"), std::invalid_argument); + + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E600\\uD8"), std::invalid_argument); + CHECK_THROWS_AS(CLI::detail::remove_escaped_characters("test\\U0001E60"), std::invalid_argument); +} + +TEST_CASE("StringTools: close_sequence", "[helpers]") { + CHECK(CLI::detail::close_sequence("[test]", 0, ']') == 5U); + CHECK(CLI::detail::close_sequence("[\"test]\"]", 0, ']') == 8U); + CHECK(CLI::detail::close_sequence("[\"test]\"],[t2]", 0, ']') == 8U); + CHECK(CLI::detail::close_sequence("[\"test]\"],[t2]", 10, ']') == 13U); + CHECK(CLI::detail::close_sequence("{\"test]\"],[t2]", 0, '}') == 14U); + CHECK(CLI::detail::close_sequence("[(),(),{},\"]]52{}\",[],[54],[[],[],()]]", 0, ']') == 37U); } TEST_CASE("Trim: Various", "[helpers]") { @@ -967,35 +1089,35 @@ TEST_CASE("Join: Backward", "[helpers]") { } TEST_CASE("SplitUp: Simple", "[helpers]") { - std::vector oput = {"one", "two three"}; + std::vector oput = {"one", "\"two three\""}; std::string orig{R"(one "two three")"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); } TEST_CASE("SplitUp: SimpleDifferentQuotes", "[helpers]") { - std::vector oput = {"one", "two three"}; + std::vector oput = {"one", "`two three`"}; std::string orig{R"(one `two three`)"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); } TEST_CASE("SplitUp: SimpleMissingQuotes", "[helpers]") { - std::vector oput = {"one", "two three"}; + std::vector oput = {"one", "`two three"}; std::string orig{R"(one `two three)"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); } TEST_CASE("SplitUp: SimpleMissingQuotesEscaped", "[helpers]") { - std::vector oput = {"one", "two three`"}; - std::string orig{R"(one `two three\`)"}; + std::vector oput = {"one", R"("two three\"")"}; + std::string orig{R"(one "two three\"")"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); } TEST_CASE("SplitUp: SimpleDifferentQuotes2", "[helpers]") { - std::vector oput = {"one", "two three"}; + std::vector oput = {"one", "'two three'"}; std::string orig{R"(one 'two three')"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); @@ -1004,59 +1126,59 @@ TEST_CASE("SplitUp: SimpleDifferentQuotes2", "[helpers]") { TEST_CASE("SplitUp: Bracket1", "[helpers]") { std::vector oput = {"one", "[two, three]"}; std::string orig{"one, [two, three]"}; - std::vector result = CLI::detail::split_up(orig, ',', false); + std::vector result = CLI::detail::split_up(orig, ','); CHECK(result == oput); } TEST_CASE("SplitUp: Bracket2", "[helpers]") { std::vector oput = {"one", ""}; std::string orig{"one, "}; - std::vector result = CLI::detail::split_up(orig, ',', false); + std::vector result = CLI::detail::split_up(orig, ','); CHECK(result == oput); } TEST_CASE("SplitUp: Bracket3", "[helpers]") { std::vector oput = {"one", "(two, three)"}; std::string orig{"one, (two, three)"}; - std::vector result = CLI::detail::split_up(orig, ',', false); + std::vector result = CLI::detail::split_up(orig, ','); CHECK(result == oput); } TEST_CASE("SplitUp: Bracket4", "[helpers]") { std::vector oput = {"one", "{two, three}"}; std::string orig{"one, {two, three}"}; - std::vector result = CLI::detail::split_up(orig, ',', false); + std::vector result = CLI::detail::split_up(orig, ','); CHECK(result == oput); } TEST_CASE("SplitUp: Comment", "[helpers]") { std::vector oput = {R"(["quote1", "#"])"}; std::string orig{R"(["quote1", "#"])"}; - std::vector result = CLI::detail::split_up(orig, '#', false); + std::vector result = CLI::detail::split_up(orig, '#'); CHECK(result == oput); } TEST_CASE("SplitUp: Layered", "[helpers]") { - std::vector output = {R"(one 'two three')"}; + std::vector output = {R"("one 'two three'")"}; std::string orig{R"("one 'two three'")"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == output); } TEST_CASE("SplitUp: Spaces", "[helpers]") { - std::vector oput = {"one", " two three"}; + std::vector oput = {"one", "\" two three\""}; std::string orig{R"( one " two three" )"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); } TEST_CASE("SplitUp: BadStrings", "[helpers]") { - std::vector oput = {"one", " two three"}; + std::vector oput = {"one", "\" two three"}; std::string orig{R"( one " two three )"}; std::vector result = CLI::detail::split_up(orig); CHECK(result == oput); - oput = {"one", " two three"}; + oput = {"one", "' two three"}; orig = R"( one ' two three )"; result = CLI::detail::split_up(orig); CHECK(result == oput); diff --git a/tests/fuzzFail/fuzz_app_file_fail33 b/tests/fuzzFail/fuzz_app_file_fail33 new file mode 100644 index 000000000..18e61dff6 --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail33 @@ -0,0 +1,2 @@ +'''-$ú +$ diff --git a/tests/fuzzFail/fuzz_app_file_fail34 b/tests/fuzzFail/fuzz_app_file_fail34 new file mode 100644 index 000000000..297cbdccd --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail34 @@ -0,0 +1 @@ +" (\\\,"‚ãã diff --git a/tests/fuzzFail/fuzz_app_file_fail35 b/tests/fuzzFail/fuzz_app_file_fail35 new file mode 100644 index 000000000..d9b5aa7c4 --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail35 @@ -0,0 +1 @@ +'^^^^^^^\^^^^^^''''''@''i¦ diff --git a/tests/fuzzFail/fuzz_app_file_fail36 b/tests/fuzzFail/fuzz_app_file_fail36 new file mode 100644 index 000000000..ddd11facc --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail36 @@ -0,0 +1 @@ +"\ " diff --git a/tests/fuzzFail/fuzz_app_file_fail37 b/tests/fuzzFail/fuzz_app_file_fail37 new file mode 100644 index 000000000..25d8567d6 --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail37 @@ -0,0 +1 @@ +"Ü-t2ÿÿÿÿp'--vopt1'â''e#ÿÿ'â''e diff --git a/tests/fuzzFail/fuzz_app_file_fail38 b/tests/fuzzFail/fuzz_app_file_fail38 new file mode 100644 index 000000000..981220297 --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail38 @@ -0,0 +1 @@ +ParseErrorEF'' --vo-d{} diff --git a/tests/fuzzFail/fuzz_app_file_fail39 b/tests/fuzzFail/fuzz_app_file_fail39 new file mode 100644 index 000000000..991c5c3bd --- /dev/null +++ b/tests/fuzzFail/fuzz_app_file_fail39 @@ -0,0 +1 @@ +[--' diff --git a/tests/fuzzFail/fuzz_file_fail3 b/tests/fuzzFail/fuzz_file_fail3 new file mode 100644 index 000000000..607bce903 --- /dev/null +++ b/tests/fuzzFail/fuzz_file_fail3 @@ -0,0 +1 @@ +"\ÿ" diff --git a/tests/fuzzFail/fuzz_file_fail4 b/tests/fuzzFail/fuzz_file_fail4 new file mode 100644 index 000000000..e7aac1a29 --- /dev/null +++ b/tests/fuzzFail/fuzz_file_fail4 @@ -0,0 +1 @@ +""\"