Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix RST parsing after option lists #17442

Merged
merged 2 commits into from
Mar 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 72 additions & 1 deletion lib/packages/docutils/rst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@
## * ***triple emphasis*** (bold and italic) using \*\*\*
## * ``:idx:`` role for \`interpreted text\` to include the link to this
## text into an index (example: `Nim index`_).
## * double slash `//` in option lists serves as a prefix for any option that
a-mr marked this conversation as resolved.
Show resolved Hide resolved
## starts from a word (without any leading symbols like `-`, `--`, `/`)::
##
## //compile compile the project
## //doc generate documentation
##
## Here the dummy `//` will disappear, while options ``compile``
## and ``doc`` will be left in the final document.
##
## .. [cmp:Sphinx] similar but different from the directives of
## Python `Sphinx directives`_ extensions
Expand Down Expand Up @@ -548,6 +556,67 @@ proc pushInd(p: var RstParser, ind: int) =
proc popInd(p: var RstParser) =
if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)

# Working with indentation in rst.nim
# -----------------------------------
#
# Every line break has an associated tkIndent.
# The tokenizer writes back the first column of next non-blank line
# in all preceeding tkIndent tokens to the `ival` field of tkIndent.
#
# RST document is separated into body elements (B.E.), every of which
# has a dedicated handler proc (or block of logic when B.E. is a block quote)
# that should follow the next rule:
# Every B.E. handler proc should finish at tkIndent (newline)
# after its B.E. finishes.
# Then its callers (which is `parseSection` or another B.E. handler)
# check for tkIndent ival (without necessity to advance `p.idx`)
# and decide themselves whether they continue processing or also stop.
#
# An example::
#
# L RST text fragment indentation
# +--------------------+
# 1 | | <- (empty line at the start of file) no tokens
# 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0
# 3 | | <- tkIndent has ival=0
# 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0
# 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2
# 6 | | <- tkIndent has ival=4
# 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4
# 8 | | <- tkIndent has ival=0
# 9 | | <- tkIndent has ival=0
# 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0
# +--------------------+
# C:01234
#
# Here parser starts with initial `indentStack=[0]` and then calls the
# 1st `parseSection`:
#
# - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
# - bullet list handler is started at reaching ``*`` (L4 C0), it
# starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
# then calls `parseSection` (2nd call, nested) which parses
# paragraph "bullet list and its continuation" and then starts
# a block quote logic (L7 C4).
# The block quote logic calls calls `pushInd(p, ind=4)` and
# calls `parseSection` again, so a (simplified) sequence of calls now is::
#
# parseSection -> parseBulletList ->
# parseSection (+block quote logic) -> parseSection
#
# 3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
# it returns to bullet item logic, which sees that next tkIndent has
# ival=0 and stops there since the required indentation for a bullet item
# is 2 and 0<2; the bullet item logic calls `popInd(p)`.
# Then bullet list handler checks that next tkWord (L10 C0) has the
# right indentation but does not have ``*`` so stops at tkIndent (L10).
# - 1st `parseSection` invocation calls `parseParagraph` and the
# "Final paragraph" is parsed.
#
# If a B.E. handler has advanced `p.idx` past tkIndent to check
# whether it should continue its processing or not, and decided not to,
# then this B.E. handler should step back (e.g. do `dec p.idx`).

proc initParser(p: var RstParser, sharedState: PSharedState) =
p.indentStack = @[0]
p.tok = @[]
Expand Down Expand Up @@ -1901,8 +1970,9 @@ proc parseBulletList(p: var RstParser): PRstNode =

proc parseOptionList(p: var RstParser): PRstNode =
result = newRstNodeA(p, rnOptionList)
let col = currentTok(p).col
while true:
if isOptionList(p):
if currentTok(p).col == col and isOptionList(p):
var a = newRstNode(rnOptionGroup)
var b = newRstNode(rnDescription)
var c = newRstNode(rnOptionListItem)
Expand All @@ -1925,6 +1995,7 @@ proc parseOptionList(p: var RstParser): PRstNode =
c.add(b)
result.add(c)
else:
dec p.idx # back to tkIndent
break

proc parseDefinitionList(p: var RstParser): PRstNode =
Expand Down
49 changes: 49 additions & 0 deletions tests/stdlib/trstgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,55 @@ Test1
let refline = "Ref. " & ref1 & "! and " & ref2 & ";and " & ref3 & "."
doAssert refline in output1

test "Option lists 1":
# check that "* b" is not consumed by previous bullet item because of
# incorrect indentation handling in option lists
let input = dedent """
* a
-m desc
-n very long
desc
* b"""
let output = input.toHtml
check(output.count("<ul") == 1)
check(output.count("<li>") == 2)
check(output.count("<table") == 1)
check("""<th align="left">-m</th><td align="left">desc</td>""" in output)
check("""<th align="left">-n</th><td align="left">very long desc</td>""" in
output)

test "Option lists 2":
# check that 2nd option list is not united with the 1st
let input = dedent """
* a
-m desc
-n very long
desc
-d option"""
let output = input.toHtml
check(output.count("<ul") == 1)
check(output.count("<table") == 2)
check("""<th align="left">-m</th><td align="left">desc</td>""" in output)
timotheecour marked this conversation as resolved.
Show resolved Hide resolved
check("""<th align="left">-n</th><td align="left">very long desc</td>""" in
output)
check("""<th align="left">-d</th><td align="left">option</td>""" in
output)

test "Option list 3 (double /)":
let input = dedent """
* a
//compile compile1
//doc doc1
cont
-d option"""
let output = input.toHtml
check(output.count("<ul") == 1)
check(output.count("<table") == 2)
check("""<th align="left">compile</th><td align="left">compile1</td>""" in output)
check("""<th align="left">doc</th><td align="left">doc1 cont</td>""" in
output)
check("""<th align="left">-d</th><td align="left">option</td>""" in
output)
suite "RST/Code highlight":
test "Basic Python code highlight":
let pythonCode = """
Expand Down