nim-lang · narimiran · Mar 25, 2021 · Mar 21, 2021 · Mar 24, 2021
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
@@ -82,6 +82,14 @@
 ## * ***triple emphasis*** (bold and italic) using \*\*\*
 ## * ``:idx:`` role for \`interpreted text\` to include the link to this
 ##   text into an index (example: `Nim index`_).
+## * double slash `//` in option lists serves as a prefix for any option that
+##   starts from a word (without any leading symbols like `-`, `--`, `/`)::
+##
+##     //compile   compile the project
+##     //doc       generate documentation
+##
+##   Here the dummy `//` will disappear, while options ``compile``
+##   and ``doc`` will be left in the final document.
 ##
 ## .. [cmp:Sphinx] similar but different from the directives of
 ##    Python `Sphinx directives`_ extensions
@@ -548,6 +556,67 @@ proc pushInd(p: var RstParser, ind: int) =
 proc popInd(p: var RstParser) =
   if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)
 
+# Working with indentation in rst.nim
+# -----------------------------------
+#
+# Every line break has an associated tkIndent.
+# The tokenizer writes back the first column of next non-blank line
+# in all preceeding tkIndent tokens to the `ival` field of tkIndent.
+#
+# RST document is separated into body elements (B.E.), every of which
+# has a dedicated handler proc (or block of logic when B.E. is a block quote)
+# that should follow the next rule:
+#   Every B.E. handler proc should finish at tkIndent (newline)
+#   after its B.E. finishes.
+#   Then its callers (which is `parseSection` or another B.E. handler)
+#   check for tkIndent ival (without necessity to advance `p.idx`)
+#   and decide themselves whether they continue processing or also stop.
+#
+# An example::
+#
+#   L    RST text fragment                  indentation
+#     +--------------------+
+#   1 |                    | <- (empty line at the start of file) no tokens
+#   2 |First paragraph.    | <- tkIndent has ival=0, and next tkWord has col=0
+#   3 |                    | <- tkIndent has ival=0
+#   4 |* bullet item and   | <- tkIndent has ival=0, and next tkPunct has col=0
+#   5 |  its continuation  | <- tkIndent has ival=2, and next tkWord has col=2
+#   6 |                    | <- tkIndent has ival=4
+#   7 |    Block quote     | <- tkIndent has ival=4, and next tkWord has col=4
+#   8 |                    | <- tkIndent has ival=0
+#   9 |                    | <- tkIndent has ival=0
+#   10|Final paragraph     | <- tkIndent has ival=0, and tkWord has col=0
+#     +--------------------+
+#    C:01234
+#
+# Here parser starts with initial `indentStack=[0]` and then calls the
+# 1st `parseSection`:
+#
+#   - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
+#   - bullet list handler is started at reaching ``*`` (L4 C0), it
+#     starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
+#     then calls `parseSection` (2nd call, nested) which parses
+#     paragraph "bullet list and its continuation" and then starts
+#     a block quote logic (L7 C4).
+#     The block quote logic calls calls `pushInd(p, ind=4)` and
+#     calls `parseSection` again, so a (simplified) sequence of calls now is::
+#
+#       parseSection -> parseBulletList ->
+#         parseSection (+block quote logic) -> parseSection
+#
+#     3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
+#     it returns to bullet item logic, which sees that next tkIndent has
+#     ival=0 and stops there since the required indentation for a bullet item
+#     is 2 and 0<2; the bullet item logic calls `popInd(p)`.
+#     Then bullet list handler checks that next tkWord (L10 C0) has the
+#     right indentation but does not have ``*`` so stops at tkIndent (L10).
+#   - 1st `parseSection` invocation calls `parseParagraph` and the
+#     "Final paragraph" is parsed.
+#
+# If a B.E. handler has advanced `p.idx` past tkIndent to check
+# whether it should continue its processing or not, and decided not to,
+# then this B.E. handler should step back (e.g. do `dec p.idx`).
+
 proc initParser(p: var RstParser, sharedState: PSharedState) =
   p.indentStack = @[0]
   p.tok = @[]
@@ -1901,8 +1970,9 @@ proc parseBulletList(p: var RstParser): PRstNode =
 
 proc parseOptionList(p: var RstParser): PRstNode =
   result = newRstNodeA(p, rnOptionList)
+  let col = currentTok(p).col
   while true:
-    if isOptionList(p):
+    if currentTok(p).col == col and isOptionList(p):
       var a = newRstNode(rnOptionGroup)
       var b = newRstNode(rnDescription)
       var c = newRstNode(rnOptionListItem)
@@ -1925,6 +1995,7 @@ proc parseOptionList(p: var RstParser): PRstNode =
       c.add(b)
       result.add(c)
     else:
+      dec p.idx  # back to tkIndent
       break
 
 proc parseDefinitionList(p: var RstParser): PRstNode =

diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim
@@ -1259,6 +1259,55 @@ Test1
     let refline = "Ref. " & ref1 & "! and " & ref2 & ";and " & ref3 & "."
     doAssert refline in output1
 
+  test "Option lists 1":
+    # check that "* b" is not consumed by previous bullet item because of
+    # incorrect indentation handling in option lists
+    let input = dedent """
+      * a
+        -m   desc
+        -n   very long
+             desc
+      * b"""
+    let output = input.toHtml
+    check(output.count("<ul") == 1)
+    check(output.count("<li>") == 2)
+    check(output.count("<table") == 1)
+    check("""<th align="left">-m</th><td align="left">desc</td>""" in output)
+    check("""<th align="left">-n</th><td align="left">very long desc</td>""" in
+          output)
+
+  test "Option lists 2":
+    # check that 2nd option list is not united with the 1st
+    let input = dedent """
+      * a
+        -m   desc
+        -n   very long
+             desc
+      -d  option"""
+    let output = input.toHtml
+    check(output.count("<ul") == 1)
+    check(output.count("<table") == 2)
+    check("""<th align="left">-m</th><td align="left">desc</td>""" in output)
+    check("""<th align="left">-n</th><td align="left">very long desc</td>""" in
+          output)
+    check("""<th align="left">-d</th><td align="left">option</td>""" in
+          output)
+
+  test "Option list 3 (double /)":
+    let input = dedent """
+      * a
+        //compile  compile1
+        //doc      doc1
+                   cont
+      -d  option"""
+    let output = input.toHtml
+    check(output.count("<ul") == 1)
+    check(output.count("<table") == 2)
+    check("""<th align="left">compile</th><td align="left">compile1</td>""" in output)
+    check("""<th align="left">doc</th><td align="left">doc1 cont</td>""" in
+          output)
+    check("""<th align="left">-d</th><td align="left">option</td>""" in
+          output)
 suite "RST/Code highlight":
   test "Basic Python code highlight":
     let pythonCode = """