From d997d4078deb63ccc9a7a05d273617d63d7b3fc1 Mon Sep 17 00:00:00 2001 From: Evgeny Gagauz Date: Mon, 1 Aug 2016 23:37:00 +0300 Subject: [PATCH 1/3] Empty lines do not break options sections. There are several reasons why an options section can be split by an empty line, among them are: - logically grouping; - aesthetic reasons. Such style is used, for example, in 'man' program. An example: Options: --before-empty-lines An option before empty lines. --after-empty-lines An option after empty lines. --- docopt.cpp | 4 ++-- testcases.docopt | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docopt.cpp b/docopt.cpp index e875d2f..67d222d 100644 --- a/docopt.cpp +++ b/docopt.cpp @@ -168,8 +168,8 @@ static std::vector parse_section(std::string const& name, std::stri std::regex const re_section_pattern { "(?:^|\\n)" // anchored at a linebreak (or start of string) "(" - "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the name - "(?:\\n[ \\t].*?(?=\\n|$))*" // followed by any number of lines that are indented + "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the section name + "(?:\\n+[ \\t].*?(?=\\n|$))*" // followed by any number of indented or empty lines ")", std::regex::icase }; diff --git a/testcases.docopt b/testcases.docopt index efe9a07..3954d33 100644 --- a/testcases.docopt +++ b/testcases.docopt @@ -955,3 +955,21 @@ other options: """ $ prog --baz --egg {"--foo": false, "--baz": true, "--bar": false, "--egg": true, "--spam": false} + + +# An empty line must not break an options section. +r""" +Usage: prog [options] + +Options: + --before-empty-lines An option before empty lines. + + + --after-empty-lines An option after empty lines. +""" + +$ prog --before-empty-lines +{"--before-empty-lines": true, "--after-empty-lines": false} + +$ prog --after-empty-line +{"--before-empty-lines": false, "--after-empty-lines": true} From fb64b5c7c712a14e481eca2a3ce5b0b9fab71084 Mon Sep 17 00:00:00 2001 From: Evgeny Gagauz Date: Fri, 5 Aug 2016 00:48:14 +0300 Subject: [PATCH 2/3] Regex in parse_section() is aligned with proposal for docopt. The proposed changes for docopt can be found in the pull request [339](https://github.com/docopt/docopt/pull/339). --- docopt.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/docopt.cpp b/docopt.cpp index 67d222d..554bcc2 100644 --- a/docopt.cpp +++ b/docopt.cpp @@ -161,16 +161,20 @@ std::vector flat_filter(Pattern& pattern) { } static std::vector parse_section(std::string const& name, std::string const& source) { + // There is no a multiline strings concept in std::regex, therefore the symbols `^` and `$` match + // only once at the start and at the end of a string, even if this string contains new line + // characters. For this reason, following constructions are used instead: + // (?:^|\\n) - start of a line; + // (?=\\n|$) - end of a line. // ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have // a newline to anchor our matching, we have to avoid matching the final newline of each grouping. - // Therefore, our regex is adjusted from the docopt Python one to use ?= to match the newlines before - // the following lines, rather than after. std::regex const re_section_pattern { - "(?:^|\\n)" // anchored at a linebreak (or start of string) - "(" - "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the section name - "(?:\\n+[ \\t].*?(?=\\n|$))*" // followed by any number of indented or empty lines - ")", + "(?:^|\\n)(" // A section begins at start of a line and consists of: + ".*" + name + ".*" // - a line that contains the section's name; and + "(?:" // - several + "\\n+[ \\t].*" // indented lines possibly separated by empty lines. + ")*" + ")(?=\\n|$)", // The section ends at the end of a line. std::regex::icase }; From 1c4b042c4ac0e37696ca608768cab94d21ea2e8f Mon Sep 17 00:00:00 2001 From: Evgeny Gagauz Date: Fri, 5 Aug 2016 02:31:01 +0300 Subject: [PATCH 3/3] Wildcards are repalced with `[^\\n]` as a workaround for Boost.Regex. The wildcard `.` matches any single character including the newline character in Boost.Regex. So, `[^\\n]` construction is used instead. --- docopt.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docopt.cpp b/docopt.cpp index 554bcc2..fcdc07e 100644 --- a/docopt.cpp +++ b/docopt.cpp @@ -166,15 +166,19 @@ static std::vector parse_section(std::string const& name, std::stri // characters. For this reason, following constructions are used instead: // (?:^|\\n) - start of a line; // (?=\\n|$) - end of a line. + // // ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have // a newline to anchor our matching, we have to avoid matching the final newline of each grouping. + // + // The wildcard `.` matches any single character including the newline character in Boost.Regex. So, + // `[^\\n]` construction is used instead. std::regex const re_section_pattern { - "(?:^|\\n)(" // A section begins at start of a line and consists of: - ".*" + name + ".*" // - a line that contains the section's name; and - "(?:" // - several - "\\n+[ \\t].*" // indented lines possibly separated by empty lines. + "(?:^|\\n)(" // A section begins at start of a line and consists of: + "[^\\n]*" + name + "[^\\n]*" // - a line that contains the section's name; and + "(?:" // - several + "\\n+[ \\t][^\\n]*" // indented lines possibly separated by empty lines. ")*" - ")(?=\\n|$)", // The section ends at the end of a line. + ")(?=\\n|$)", // The section ends at the end of a line. std::regex::icase };