Skip to content

Commit 91220ba

Browse files
regular and literal strings (#964)
Add escaping to quoted strings, differentiate between literal and regular strings. The goal is to make string processing in config files as close as possible to toml standards. This means handing escape sequences including unicode, and differentiating between literal strings and regular strings in files and when splitting the command line. Also allowing variable names in the files to be quoted. This PR gets partway there. Removes some hacks from the previous PR to deal with unusual option names and replaces with the quoted names. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent ba833f0 commit 91220ba

20 files changed

+466
-153
lines changed

include/CLI/Config.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace detail {
2626

2727
std::string convert_arg_for_ini(const std::string &arg,
2828
char stringQuote = '"',
29-
char characterQuote = '\'',
29+
char literalQuote = '\'',
3030
bool disable_multi_line = false);
3131

3232
/// Comma separated join, adds quotes if needed
@@ -35,7 +35,7 @@ std::string ini_join(const std::vector<std::string> &args,
3535
char arrayStart = '[',
3636
char arrayEnd = ']',
3737
char stringQuote = '"',
38-
char characterQuote = '\'');
38+
char literalQuote = '\'');
3939

4040
std::vector<std::string> generate_parents(const std::string &section, std::string &name, char parentSeparator);
4141

include/CLI/ConfigFwd.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class ConfigBase : public Config {
9292
char valueDelimiter = '=';
9393
/// the character to use around strings
9494
char stringQuote = '"';
95-
/// the character to use around single characters
96-
char characterQuote = '\'';
95+
/// the character to use around single characters and literal strings
96+
char literalQuote = '\'';
9797
/// the maximum number of layers to allow
9898
uint8_t maximumLayers{255};
9999
/// the separator used to separator parent layers
@@ -132,7 +132,7 @@ class ConfigBase : public Config {
132132
/// Specify the quote characters used around strings and characters
133133
ConfigBase *quoteCharacter(char qString, char qChar) {
134134
stringQuote = qString;
135-
characterQuote = qChar;
135+
literalQuote = qChar;
136136
return this;
137137
}
138138
/// Specify the maximum number of parents

include/CLI/StringTools.hpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ inline std::string trim_copy(const std::string &str) {
120120
/// remove quotes at the front and back of a string either '"' or '\''
121121
CLI11_INLINE std::string &remove_quotes(std::string &str);
122122

123+
/// remove quotes from all elements of a string vector and process escaped components
124+
CLI11_INLINE void remove_quotes(std::vector<std::string> &args);
125+
123126
/// Add a leader to the beginning of all new lines (nothing is added
124127
/// at the start of the first line). `"; "` would be for ini files
125128
///
@@ -212,9 +215,13 @@ template <typename Callable> inline std::string find_and_modify(std::string str,
212215
return str;
213216
}
214217

218+
/// close a sequence of characters indicated by a closure character. Brackets allows sub sequences
219+
/// recognized bracket sequences include "'`[(<{ other closure characters are assumed to be literal strings
220+
CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char);
221+
215222
/// Split a string '"one two" "three"' into 'one two', 'three'
216223
/// Quote characters can be ` ' or " or bracket characters [{(< with matching to the matching bracket
217-
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter = '\0', bool removeQuotes = true);
224+
CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter = '\0');
218225

219226
/// get the value of an environmental variable or empty string if empty
220227
CLI11_INLINE std::string get_environment_value(const std::string &env_name);
@@ -246,6 +253,9 @@ CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string);
246253
/// extract an escaped binary_string
247254
CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string);
248255

256+
/// process a quoted string, remove the quotes and if appropriate handle escaped characters
257+
CLI11_INLINE bool process_quoted_string(std::string &str, char string_char = '\"', char literal_char = '\'');
258+
249259
} // namespace detail
250260

251261
// [CLI11:string_tools_hpp:end]

include/CLI/impl/App_inl.hpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,11 @@ CLI11_INLINE void App::parse(std::string commandline, bool program_name_included
579579
auto args = detail::split_up(std::move(commandline));
580580
// remove all empty strings
581581
args.erase(std::remove(args.begin(), args.end(), std::string{}), args.end());
582+
try {
583+
detail::remove_quotes(args);
584+
} catch(const std::invalid_argument &arg) {
585+
throw CLI::ParseError(arg.what(), CLI::ExitCodes::InvalidError);
586+
}
582587
std::reverse(args.begin(), args.end());
583588
parse(std::move(args));
584589
}
@@ -1569,7 +1574,7 @@ CLI11_INLINE bool App::_parse_single(std::vector<std::string> &args, bool &posit
15691574
case detail::Classifier::SHORT:
15701575
case detail::Classifier::WINDOWS_STYLE:
15711576
// If already parsed a subcommand, don't accept options_
1572-
_parse_arg(args, classifier, false);
1577+
retval = _parse_arg(args, classifier, false);
15731578
break;
15741579
case detail::Classifier::NONE:
15751580
// Probably a positional or something for a parent (sub)command

include/CLI/impl/Config_inl.hpp

+64-55
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,19 @@
1919
namespace CLI {
2020
// [CLI11:config_inl_hpp:verbatim]
2121

22-
static constexpr auto triple_quote = R"(""")";
22+
static constexpr auto multiline_literal_quote = R"(''')";
23+
static constexpr auto multiline_string_quote = R"(""")";
2324

2425
namespace detail {
2526

2627
CLI11_INLINE bool is_printable(const std::string &test_string) {
2728
return std::all_of(test_string.begin(), test_string.end(), [](char x) {
28-
return (isprint(static_cast<unsigned char>(x)) != 0 || x == '\n');
29+
return (isprint(static_cast<unsigned char>(x)) != 0 || x == '\n' || x == '\t');
2930
});
3031
}
3132

3233
CLI11_INLINE std::string
33-
convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuote, bool disable_multi_line) {
34+
convert_arg_for_ini(const std::string &arg, char stringQuote, char literalQuote, bool disable_multi_line) {
3435
if(arg.empty()) {
3536
return std::string(2, stringQuote);
3637
}
@@ -53,13 +54,10 @@ convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuot
5354
if(isprint(static_cast<unsigned char>(arg.front())) == 0) {
5455
return binary_escape_string(arg);
5556
}
56-
if(arg == "\\") {
57-
return std::string(1, stringQuote) + "\\\\" + stringQuote;
58-
}
5957
if(arg == "'") {
6058
return std::string(1, stringQuote) + "'" + stringQuote;
6159
}
62-
return std::string(1, characterQuote) + arg + characterQuote;
60+
return std::string(1, literalQuote) + arg + literalQuote;
6361
}
6462
// handle hex, binary or octal arguments
6563
if(arg.front() == '0') {
@@ -82,13 +80,10 @@ convert_arg_for_ini(const std::string &arg, char stringQuote, char characterQuot
8280
if(!is_printable(arg)) {
8381
return binary_escape_string(arg);
8482
}
85-
if(arg.find_first_of('\n') != std::string::npos) {
86-
if(disable_multi_line) {
87-
return binary_escape_string(arg);
88-
}
89-
return std::string(triple_quote) + arg + triple_quote;
90-
}
9183
if(detail::has_escapable_character(arg)) {
84+
if(arg.size() > 100 && !disable_multi_line) {
85+
return std::string(multiline_literal_quote) + arg + multiline_literal_quote;
86+
}
9287
return std::string(1, stringQuote) + detail::add_escaped_characters(arg) + stringQuote;
9388
}
9489
return std::string(1, stringQuote) + arg + stringQuote;
@@ -99,7 +94,7 @@ CLI11_INLINE std::string ini_join(const std::vector<std::string> &args,
9994
char arrayStart,
10095
char arrayEnd,
10196
char stringQuote,
102-
char characterQuote) {
97+
char literalQuote) {
10398
bool disable_multi_line{false};
10499
std::string joined;
105100
if(args.size() > 1 && arrayStart != '\0') {
@@ -114,7 +109,7 @@ CLI11_INLINE std::string ini_join(const std::vector<std::string> &args,
114109
joined.push_back(' ');
115110
}
116111
}
117-
joined.append(convert_arg_for_ini(arg, stringQuote, characterQuote, disable_multi_line));
112+
joined.append(convert_arg_for_ini(arg, stringQuote, literalQuote, disable_multi_line));
118113
}
119114
if(args.size() > 1 && arrayEnd != '\0') {
120115
joined.push_back(arrayEnd);
@@ -233,7 +228,7 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
233228
if(len < 3) {
234229
continue;
235230
}
236-
if(line.compare(0, 3, triple_quote) == 0 || line.compare(0, 3, "'''") == 0) {
231+
if(line.compare(0, 3, multiline_string_quote) == 0 || line.compare(0, 3, multiline_literal_quote) == 0) {
237232
inMLineComment = true;
238233
auto cchar = line.front();
239234
while(inMLineComment) {
@@ -277,29 +272,26 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
277272

278273
// comment lines
279274
if(line.front() == ';' || line.front() == '#' || line.front() == commentChar) {
280-
if(line.compare(2, 13, "cli11:literal") == 0) {
281-
literalName = true;
282-
getline(input, buffer);
283-
line = detail::trim_copy(buffer);
284-
} else {
285-
continue;
286-
}
275+
continue;
276+
}
277+
std::size_t search_start = 0;
278+
if(line.front() == stringQuote || line.front() == literalQuote || line.front() == '`') {
279+
search_start = detail::close_sequence(line, 0, line.front());
287280
}
288-
289281
// Find = in string, split and recombine
290-
auto delimiter_pos = line.find_first_of(valueDelimiter, 1);
291-
auto comment_pos = (literalName) ? std::string::npos : line.find_first_of(commentChar);
292-
282+
auto delimiter_pos = line.find_first_of(valueDelimiter, search_start + 1);
283+
auto comment_pos = line.find_first_of(commentChar, search_start);
293284
if(comment_pos < delimiter_pos) {
294285
delimiter_pos = std::string::npos;
295286
}
296287
if(delimiter_pos != std::string::npos) {
297288

298289
name = detail::trim_copy(line.substr(0, delimiter_pos));
299290
std::string item = detail::trim_copy(line.substr(delimiter_pos + 1, std::string::npos));
300-
bool mlquote = (item.compare(0, 3, "'''") == 0 || item.compare(0, 3, triple_quote) == 0);
291+
bool mlquote =
292+
(item.compare(0, 3, multiline_literal_quote) == 0 || item.compare(0, 3, multiline_string_quote) == 0);
301293
if(!mlquote && comment_pos != std::string::npos && !literalName) {
302-
auto citems = detail::split_up(item, commentChar, false);
294+
auto citems = detail::split_up(item, commentChar);
303295
item = detail::trim_copy(citems.front());
304296
}
305297
if(mlquote) {
@@ -337,6 +329,9 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
337329
if(!item.empty() && item.back() == '\n') {
338330
item.pop_back();
339331
}
332+
if(keyChar == '\"') {
333+
item = detail::remove_escaped_characters(item);
334+
}
340335
} else {
341336
if(lineExtension) {
342337
detail::trim(l2);
@@ -358,29 +353,27 @@ inline std::vector<ConfigItem> ConfigBase::from_config(std::istream &input) cons
358353
detail::trim(multiline);
359354
item += multiline;
360355
}
361-
items_buffer = detail::split_up(item.substr(1, item.length() - 2), aSep, false);
356+
items_buffer = detail::split_up(item.substr(1, item.length() - 2), aSep);
362357
} else if((isDefaultArray || isINIArray) && item.find_first_of(aSep) != std::string::npos) {
363-
items_buffer = detail::split_up(item, aSep, false);
358+
items_buffer = detail::split_up(item, aSep);
364359
} else if((isDefaultArray || isINIArray) && item.find_first_of(' ') != std::string::npos) {
365-
items_buffer = detail::split_up(item, '\0', false);
360+
items_buffer = detail::split_up(item, '\0');
366361
} else {
367362
items_buffer = {item};
368363
}
369364
} else {
370365
name = detail::trim_copy(line.substr(0, comment_pos));
371366
items_buffer = {"true"};
372367
}
373-
if(name.find(parentSeparatorChar) == std::string::npos) {
374-
if(!literalName) {
375-
detail::remove_quotes(name);
376-
}
377-
}
378-
// clean up quotes on the items and check for escaped strings
379-
for(auto &it : items_buffer) {
380-
detail::remove_quotes(it);
381-
if(detail::is_binary_escaped_string(it)) {
382-
it = detail::extract_binary_string(it);
368+
try {
369+
literalName = detail::process_quoted_string(name, stringQuote, literalQuote);
370+
371+
// clean up quotes on the items and check for escaped strings
372+
for(auto &it : items_buffer) {
373+
detail::process_quoted_string(it, stringQuote, literalQuote);
383374
}
375+
} catch(const std::invalid_argument &ia) {
376+
throw CLI::ParseError(ia.what(), CLI::ExitCodes::InvalidError);
384377
}
385378
std::vector<std::string> parents;
386379
if(literalName) {
@@ -461,16 +454,17 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description,
461454
continue;
462455
}
463456
}
464-
std::string name = prefix + opt->get_single_name();
465-
if(name == prefix) {
457+
std::string single_name = opt->get_single_name();
458+
if(single_name.empty()) {
466459
continue;
467460
}
461+
468462
std::string value = detail::ini_join(
469-
opt->reduced_results(), arraySeparator, arrayStart, arrayEnd, stringQuote, characterQuote);
463+
opt->reduced_results(), arraySeparator, arrayStart, arrayEnd, stringQuote, literalQuote);
470464

471465
if(value.empty() && default_also) {
472466
if(!opt->get_default_str().empty()) {
473-
value = detail::convert_arg_for_ini(opt->get_default_str(), stringQuote, characterQuote, false);
467+
value = detail::convert_arg_for_ini(opt->get_default_str(), stringQuote, literalQuote, false);
474468
} else if(opt->get_expected_min() == 0) {
475469
value = "false";
476470
} else if(opt->get_run_callback_for_default()) {
@@ -479,37 +473,52 @@ ConfigBase::to_config(const App *app, bool default_also, bool write_description,
479473
}
480474

481475
if(!value.empty()) {
476+
482477
if(!opt->get_fnames().empty()) {
483478
try {
484-
value = opt->get_flag_value(name, value);
479+
value = opt->get_flag_value(single_name, value);
485480
} catch(const CLI::ArgumentMismatch &) {
486481
bool valid{false};
487482
for(const auto &test_name : opt->get_fnames()) {
488483
try {
489484
value = opt->get_flag_value(test_name, value);
490-
name = test_name;
485+
single_name = test_name;
491486
valid = true;
492487
} catch(const CLI::ArgumentMismatch &) {
493488
continue;
494489
}
495490
}
496491
if(!valid) {
497492
value = detail::ini_join(
498-
opt->results(), arraySeparator, arrayStart, arrayEnd, stringQuote, characterQuote);
493+
opt->results(), arraySeparator, arrayStart, arrayEnd, stringQuote, literalQuote);
499494
}
500495
}
501496
}
502497
if(write_description && opt->has_description()) {
503498
out << '\n';
504499
out << commentLead << detail::fix_newlines(commentLead, opt->get_description()) << '\n';
505500
}
506-
if(name.find_first_of(commentTest) != std::string::npos || name.compare(0, 3, triple_quote) == 0 ||
507-
name.compare(0, 3, "'''") == 0 || (name.front() == '[' && name.back() == ']') ||
508-
(name.front() == stringQuote && name.back() == stringQuote) ||
509-
(name.front() == characterQuote && name.back() == characterQuote) ||
510-
(name.front() == '`' && name.back() == '`')) {
511-
out << commentChar << " cli11:literal\n";
501+
if(single_name.find_first_of(commentTest) != std::string::npos ||
502+
single_name.compare(0, 3, multiline_string_quote) == 0 ||
503+
single_name.compare(0, 3, multiline_literal_quote) == 0 ||
504+
(single_name.front() == '[' && single_name.back() == ']') ||
505+
(single_name.find_first_of(stringQuote) != std::string::npos) ||
506+
(single_name.find_first_of(literalQuote) != std::string::npos) ||
507+
(single_name.find_first_of('`') != std::string::npos)) {
508+
if(single_name.find_first_of(literalQuote) == std::string::npos) {
509+
single_name.insert(0, 1, literalQuote);
510+
single_name.push_back(literalQuote);
511+
} else {
512+
if(detail::has_escapable_character(single_name)) {
513+
single_name = detail::add_escaped_characters(single_name);
514+
}
515+
single_name.insert(0, 1, stringQuote);
516+
single_name.push_back(stringQuote);
517+
}
512518
}
519+
520+
std::string name = prefix + single_name;
521+
513522
out << name << valueDelimiter << value << '\n';
514523
}
515524
}

include/CLI/impl/Option_inl.hpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,12 @@ CLI11_INLINE void Option::_reduce_results(results_t &out, const results_t &origi
609609
throw ArgumentMismatch::AtLeast(get_name(), static_cast<int>(num_min), original.size());
610610
}
611611
if(original.size() > num_max) {
612-
throw ArgumentMismatch::AtMost(get_name(), static_cast<int>(num_max), original.size());
612+
if(original.size() == 2 && num_max == 1 && original[1] == "%%" && original[0] == "{}") {
613+
// this condition is a trap for the following empty indicator check on config files
614+
out = original;
615+
} else {
616+
throw ArgumentMismatch::AtMost(get_name(), static_cast<int>(num_max), original.size());
617+
}
613618
}
614619
break;
615620
}

0 commit comments

Comments
 (0)