diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh
index 9ac2e548ae1..af51d8b37ee 100755
--- a/utils/grass_html2md.sh
+++ b/utils/grass_html2md.sh
@@ -9,14 +9,14 @@ set -eu
# wget
#
# Author(s):
-# Martin Landa, Markus Neteler
+# Martin Landa, Markus Neteler, Corey White
#
# Usage:
# If you have "pandoc" in PATH, execute for HTML file conversion in
# current directory and subdirectories:
# ./utils/grass_html2md.sh
#
-# COPYRIGHT: (C) 2024 by the GRASS Development Team
+# COPYRIGHT: (C) 2024-2025 by the GRASS Development Team
#
# This program is free software under the GNU General Public
# License (>=v2). Read the file COPYING that comes with GRASS
@@ -43,6 +43,22 @@ trap "exitprocedure" 2 3 15
# path to LUA file (./utils/pandoc_codeblock.lua)
UTILSPATH="utils"
+process_file() {
+ local file="$1" # temporary file
+ local f="$2" # original file
+
+ cat "$file" | \
+ sed 's#
##g' | \
+ pandoc -f html-native_divs \
+ -t gfm+pipe_tables+gfm_auto_identifiers --wrap=auto \
+ --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" | \
+ sed 's+ \\\$+ \$+g' | sed 's+%20+-+g' > "${f%%.html}.md"
+
+ rm -f "$file"
+
+}
+
# run recursively: HTML to MD
for f in $(find . -name *.html); do
echo "${f}"
@@ -57,13 +73,6 @@ for f in $(find . -name *.html); do
s|_KEEPHTML||g;
' "${f%%.html}.html" > "${f%%.html}_tmp.html"
- cat "${f%%.html}_tmp.html" | \
- sed 's###g' | \
- pandoc --from=html --to=markdown -t gfm \
- --lua-filter "${UTILSPATH}/pandoc_codeblock.lua" | \
- sed 's+ \\\$+ \$+g' | sed 's+%20+-+g' > "${f%%.html}.md"
-
- rm -f "${f%%.html}_tmp.html"
+ process_file "${f%%.html}_tmp.html" ${f%%.html}.html
done
diff --git a/utils/pandoc_codeblock.lua b/utils/pandoc_codeblock.lua
index e2a0a54910f..4e45faa6147 100644
--- a/utils/pandoc_codeblock.lua
+++ b/utils/pandoc_codeblock.lua
@@ -2,7 +2,71 @@
-- Test cases
-- raster/r.sun/r.sun.html
--- Function to convert code blocks to markdown
-function CodeBlock (cb)
- return pandoc.RawBlock('markdown', '```shell\n' .. cb.text .. '\n```\n')
+-- Enforces markdownlint rules during Pandoc conversion
+local MAX_LINE_LENGTH = 120 -- Adjust as needed for MD013
+
+local LIST_INDENT = ""
+
+function Image(el)
+ -- Convert HTML
to Markdown 
+ local alt_text = el.alt or "image-alt"
+ local src = el.src
+ return pandoc.Image({pandoc.Str(alt_text)}, src)
+end
+
+-- Fixes some edge cases with raw HTML elements
+function RawInline(el)
+ if el.format == "html" then
+ if el.text:match("") then
+ return pandoc.RawInline("markdown", "*")
+ elseif el.text:match("") then
+ return pandoc.RawInline("markdown", "*")
+ elseif el.text:match("") then
+ return pandoc.RawInline("markdown", "*")
+ elseif el.text:match("") then
+ return pandoc.RawInline("markdown", "*")
+ elseif el.text:match(" ") then
+ return pandoc.RawInline("markdown", " ")
+ elseif el.text:match("<") then
+ return pandoc.RawInline("markdown", "<")
+ elseif el.text:match(">") then
+ return pandoc.RawInline("markdown", ">")
+ end
+ end
+ return el
+end
+
+function CodeBlock(el)
+ -- Ensure fenced code blocks with backticks
+ local lang = el.classes[1] or "sh" -- Preserve language if available
+ return pandoc.RawBlock("markdown", "```" .. lang .. "\n" .. el.text .. "\n```")
+end
+
+function Header(el)
+ return pandoc.Header(el.level, el.content) -- Ensure ATX-style headers
+end
+
+function Str(el)
+ local text = el.text:gsub("%s+$", "") -- Remove trailing spaces
+ return pandoc.Str(text)
+end
+
+function Pandoc(doc)
+ -- Process document with defined rules
+ local new_blocks = {}
+ local previous_blank = false
+
+ for _, block in ipairs(doc.blocks) do
+ if block.t == "Para" and #block.content == 0 then
+ if not previous_blank then
+ table.insert(new_blocks, block)
+ end
+ previous_blank = true
+ else
+ table.insert(new_blocks, block)
+ previous_blank = false
+ end
+ end
+
+ return pandoc.Pandoc(new_blocks)
end