vkbo
diff --git a/‎.gitignore
+3 b/‎.gitignore
+3
diff --git a/‎docs/source/index.rst
+9-2 b/‎docs/source/index.rst
+9-2
diff --git a/‎docs/source/more_counting.rst
+98 b/‎docs/source/more_counting.rst
+98
diff --git a/‎docs/source/usage_projectformat.rst renamed to ‎docs/source/more_projectformat.rst b/‎docs/source/usage_projectformat.rst renamed to ‎docs/source/more_projectformat.rst
diff --git a/‎novelwriter/assets/icons/typicons_dark/icons.conf
+2 b/‎novelwriter/assets/icons/typicons_dark/icons.conf
+2
diff --git a/‎novelwriter/assets/icons/typicons_dark/typ_arrow-down.svg
+4 b/‎novelwriter/assets/icons/typicons_dark/typ_arrow-down.svg
+4
diff --git a/‎novelwriter/assets/icons/typicons_dark/typ_arrow-right.svg
+4 b/‎novelwriter/assets/icons/typicons_dark/typ_arrow-right.svg
+4
diff --git a/‎novelwriter/assets/icons/typicons_light/icons.conf
+2 b/‎novelwriter/assets/icons/typicons_light/icons.conf
+2
diff --git a/‎novelwriter/assets/icons/typicons_light/typ_arrow-down.svg
+4 b/‎novelwriter/assets/icons/typicons_light/typ_arrow-down.svg
+4
diff --git a/‎novelwriter/assets/icons/typicons_light/typ_arrow-right.svg
+4 b/‎novelwriter/assets/icons/typicons_light/typ_arrow-right.svg
+4
diff --git a/‎novelwriter/constants.py
-5 b/‎novelwriter/constants.py
-5
diff --git a/‎novelwriter/core/docbuild.py
+7-2 b/‎novelwriter/core/docbuild.py
+7-2
diff --git a/‎novelwriter/core/index.py
+4-87 b/‎novelwriter/core/index.py
+4-87
diff --git a/‎novelwriter/core/tohtml.py
+4-4 b/‎novelwriter/core/tohtml.py
+4-4
@@ -50,3 +50,6 @@ ToC.txt
 # Coverage
 /.coverage
 /coverage.*
+
+# Other
+/test.py
@@ -66,7 +66,6 @@ with pip. See :ref:`a_started` for more details.
    usage_format
    usage_shortcuts
    usage_typography
-   usage_projectformat
 
 .. toctree::
    :maxdepth: 1
@@ -80,7 +79,15 @@ with pip. See :ref:`a_started` for more details.
 
 .. toctree::
    :maxdepth: 1
-   :caption: Additional Topics
+   :caption: Additional Details
+   :hidden:
+
+   more_projectformat
+   more_counting
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Technical Topics
    :hidden:
 
    tech_locations
 
@@ -0,0 +1,98 @@
+.. _a_counting:
+
+********************
+Word and Text Counts
+********************
+
+This is an overview of how words and other counts of your text are performed. The counting rules
+should be relatively standard, and are compared to LibreOffice Writer rules.
+
+The counts provided in the app on the raw text is meant to be approximate. For more accurate
+counts, you need to build your manuscript in the :guilabel:`Manuscript Tool` and check the counts
+on the generated preview.
+
+
+Text Word Counts and Stats
+==========================
+
+These are the rules for the main counts available for for each document in a project.
+
+For all counts, the following rules apply.
+
+#. Short (–) and long (—) dashes are considered word separators.
+#. Any line starting with ``%`` or ``@`` is ignored.
+#. Trailing white spaces are ignored, including line breaks.
+#. Leading ``>`` and trailing ``<`` are ignored with any spaces next to them.
+#. Valid shortcodes and other commands wrapped in brackets ``[]`` are ignored.
+#. In-line Markdown syntax in text paragraphs is treated as part of the text.
+
+After the above preparation of the text, the following counts are available.
+
+**Character Count**
+   The character count is the sum of characters per line, including leading and in-text white space
+   characters, but excluding trailing white space characters. Shortcodes in the text are not
+   included, but Markdown codes are. Only headers and text are counted.
+
+**Word Count**
+   The words count is the sum of blocks of continuous character per line separated by any number of
+   white space characters or dashes. Only headers and text are counted.
+
+**Paragraph Count**
+   The paragraph count is the number of text blocks separated by one or more empty line. A line
+   consisting only of white spaces is considered empty.
+
+
+Manuscript Counts
+=================
+
+These are the rules for the counts available for a manuscript in the :guilabel:`Manuscript Tool`.
+The rules have been tuned to agree with LibreOffice Writer, but will vary slightly depending on the
+content of your text. LibreOffice Writer also counts the text in the page header, which the
+Manuscript Tool does not.
+
+The content of each line is counted after all formatting has been processed, so the result will be
+more accurate than the counts for text documents elsewhere in the app. The following rules apply:
+
+#. Short (–) and long (—) dashes are considered word separators.
+#. Leading and trailing white spaces are generally included, but paragraph breaks are not.
+#. Hard line breaks within paragraph are considered white space characters.
+#. All formatting codes are ignored, including shortcodes, commands and Markdown.
+#. Scene and section separators are counted.
+#. Comments and meta data lines are counted after they are formatted.
+#. Headers are counted after they are formatted with custom formats.
+
+The following counts are available:
+
+**Header Count**
+   The number of headers in the manuscript.
+
+**Paragraph Count**
+   The number of body text paragraphs in the manuscript.
+
+**Total Word Count**
+   The number of words in the manuscript, including any comments and meta data text.
+
+**Text Word Count**
+   The number of words in body text paragraphs, excluding all other text.
+
+**Header Word Count**
+   The number of words in headers, including inserted formatting like chapter numbers, etc.
+
+**Total Character Count**
+   The number of characters on all lines, including any comments and meta data text. Paragraph
+   breaks are not counted, but in-paragraph hard line breaks are.
+
+**Text Character Count**
+   The number of characters in body text paragraphs. Paragraph breaks are not counted, but
+   in-paragraph hard line breaks are.
+
+**Header Character Count**
+   The number of characters in headings.
+
+**Text Words Character Count**
+   The number of characters in body text paragraphs considered part of a word or punctuation. That
+   is, white space characters are not counted.
+
+**Header Words Character Count**
+   The number of characters in headers considered part of a word or punctuation. That is, white
+   space characters are not counted.
@@ -100,6 +100,8 @@ status_time     = typ_stopwatch-grey.svg
 sticky-off      = typ_pin-outline.svg
 sticky-on       = typ_pin.svg
 unchecked       = mixed_input-unchecked.svg
+unfold-hide     = typ_arrow-right.svg
+unfold-show     = typ_arrow-down.svg
 up              = typ_chevron-up.svg
 view            = typ_eye.svg
 view_build      = typ_export-grey.svg
 
@@ -100,6 +100,8 @@ status_time     = typ_stopwatch-grey.svg
 sticky-off      = typ_pin-outline.svg
 sticky-on       = typ_pin.svg
 unchecked       = mixed_input-unchecked.svg
+unfold-hide     = typ_arrow-right.svg
+unfold-show     = typ_arrow-down.svg
 up              = typ_chevron-up.svg
 view            = typ_eye.svg
 view_build      = typ_export-grey.svg
 
@@ -23,8 +23,6 @@
 """
 from __future__ import annotations
 
-import re
-
 from PyQt5.QtCore import QCoreApplication, QT_TRANSLATE_NOOP
 
 from novelwriter.enum import nwBuildFmt, nwItemClass, nwItemLayout, nwOutline
@@ -70,9 +68,6 @@ class nwRegEx:
     FMT_SC = r"(?i)(?<!\\)(\[[\/\!]?(?:i|b|s|u|m|sup|sub)\])"
     FMT_SV = r"(?<!\\)(\[(?i)(?:fn|footnote):)(.+?)(?<!\\)(\])"
 
-    # Pre-Compiled RegEx
-    RX_SC = re.compile(FMT_SC)
-
 # END Class nwRegEx
 
 
 
@@ -52,14 +52,15 @@ class NWBuildDocument:
     manuscript, based on a build definition object (BuildSettings).
     """
 
-    __slots__ = ("_project", "_build", "_queue", "_error", "_cache")
+    __slots__ = ("_project", "_build", "_queue", "_error", "_cache", "_count")
 
-    def __init__(self, project: NWProject, build: BuildSettings) -> None:
+    def __init__(self, project: NWProject, build: BuildSettings, doCount: bool = False) -> None:
         self._project = project
         self._build = build
         self._queue = []
         self._error = None
         self._cache = None
+        self._count = doCount
         return
 
     ##
@@ -314,11 +315,15 @@ def _doBuild(self, bldObj: Tokenizer, tHandle: str, convert: bool = True) -> boo
                     bldObj.addRootHeading(tHandle)
                     if convert:
                         bldObj.doConvert()
+                    if self._count:
+                        bldObj.countStats()
                 elif tItem.isFileType():
                     bldObj.setText(tHandle)
                     bldObj.doPreProcessing()
                     bldObj.tokenizeText()
                     bldObj.doHeaders()
+                    if self._count:
+                        bldObj.countStats()
                     if convert:
                         bldObj.doConvert()
                 else:
 
@@ -3,7 +3,6 @@
 ===========================
 
 File History:
-Created: 2019-04-22 [0.0.1]  countWords
 Created: 2019-05-27 [0.1.4]  NWIndex
 Created: 2022-05-28 [2.0rc1] IndexItem
 Created: 2022-05-28 [2.0rc1] IndexHeading
@@ -40,7 +39,8 @@
 from novelwriter.enum import nwComment, nwItemClass, nwItemType, nwItemLayout
 from novelwriter.error import logException
 from novelwriter.common import checkInt, isHandle, isItemClass, isTitleTag, jsonEncode
-from novelwriter.constants import nwFiles, nwKeyWords, nwRegEx, nwUnicode, nwHeaders
+from novelwriter.constants import nwFiles, nwKeyWords, nwHeaders
+from novelwriter.text.counting import standardCounter
 
 if TYPE_CHECKING:  # pragma: no cover
     from novelwriter.core.item import NWItem
@@ -266,7 +266,7 @@ def scanText(self, tHandle: str, text: str, blockSignal: bool = False) -> bool:
         self._itemIndex.add(tHandle, tItem)
 
         # Run word counter for the whole text
-        cC, wC, pC = countWords(text)
+        cC, wC, pC = standardCounter(text)
         tItem.setCharCount(cC)
         tItem.setWordCount(wC)
         tItem.setParaCount(pC)
@@ -400,7 +400,7 @@ def _splitHeading(self, line: str) -> tuple[str, str]:
 
     def _indexWordCounts(self, tHandle: str, text: str, sTitle: str) -> None:
         """Count text stats and save the counts to the index."""
-        cC, wC, pC = countWords(text)
+        cC, wC, pC = standardCounter(text)
         self._itemIndex.setHeadingCounts(tHandle, sTitle, cC, wC, pC)
         return
 
@@ -1315,86 +1315,3 @@ def processComment(text: str) -> tuple[nwComment, str, int]:
     if content and (clean := classifier.strip().lower()) in CLASSIFIERS:
         return CLASSIFIERS[clean], content.strip(), text.find(":") + 1
     return nwComment.PLAIN, check, 0
-
-
-def countWords(text: str) -> tuple[int, int, int]:
-    """Count words in a piece of text, skipping special syntax and
-    comments.
-    """
-    charCount = 0
-    wordCount = 0
-    paraCount = 0
-    prevEmpty = True
-
-    if not isinstance(text, str):
-        return charCount, wordCount, paraCount
-
-    # We need to treat dashes as word separators for counting words.
-    # The check+replace approach is much faster than direct replace for
-    # large texts, and a bit slower for small texts, but in the latter
-    # case it doesn't really matter.
-    if nwUnicode.U_ENDASH in text:
-        text = text.replace(nwUnicode.U_ENDASH, " ")
-    if nwUnicode.U_EMDASH in text:
-        text = text.replace(nwUnicode.U_EMDASH, " ")
-
-    # Strip shortcodes
-    if "[" in text:
-        text = nwRegEx.RX_SC.sub("", text)
-
-    for line in text.splitlines():
-
-        countPara = True
-
-        if not line:
-            prevEmpty = True
-            continue
-
-        if line[0] == "@" or line[0] == "%":
-            continue
-
-        if line[0] == "[":
-            check = line.lower()
-            if check.startswith(("[newpage]", "[new page]", "[vspace]")):
-                continue
-            elif check.startswith("[vspace:") and line.endswith("]"):
-                continue
-
-        elif line[0] == "#":
-            if line[:5] == "#### ":
-                line = line[5:]
-                countPara = False
-            elif line[:4] == "### ":
-                line = line[4:]
-                countPara = False
-            elif line[:3] == "## ":
-                line = line[3:]
-                countPara = False
-            elif line[:2] == "# ":
-                line = line[2:]
-                countPara = False
-            elif line[:3] == "#! ":
-                line = line[3:]
-                countPara = False
-            elif line[:4] == "##! ":
-                line = line[4:]
-                countPara = False
-
-        elif line[0] == ">" or line[-1] == "<":
-            if line[:2] == ">>":
-                line = line[2:].lstrip(" ")
-            elif line[:1] == ">":
-                line = line[1:].lstrip(" ")
-            if line[-2:] == "<<":
-                line = line[:-2].rstrip(" ")
-            elif line[-1:] == "<":
-                line = line[:-1].rstrip(" ")
-
-        wordCount += len(line.split())
-        charCount += len(line)
-        if countPara and prevEmpty:
-            paraCount += 1
-
-        prevEmpty = not countPara
-
-    return charCount, wordCount, paraCount
@@ -368,13 +368,12 @@ def replaceTabs(self, nSpaces: int = 8, spaceChar: str = "&nbsp;") -> None:
 
     def getStyleSheet(self) -> list[str]:
         """Generate a stylesheet for the current settings."""
-        styles = []
         if not self._cssStyles:
-            return styles
+            return []
 
         mScale = self._lineHeight/1.15
-        textAlign = "justify" if self._doJustify else "left"
 
+        styles = []
         styles.append("body {{font-family: '{0:s}'; font-size: {1:d}pt;}}".format(
             self._textFont, self._textSize
         ))
@@ -384,7 +383,7 @@ def getStyleSheet(self) -> list[str]:
             "margin-top: {2:.2f}em; margin-bottom: {3:.2f}em;"
             "}}"
         ).format(
-            textAlign,
+            "justify" if self._doJustify else "left",
             round(100 * self._lineHeight),
             mScale * self._marginText[0],
             mScale * self._marginText[1],
@@ -449,6 +448,7 @@ def getStyleSheet(self) -> list[str]:
         ))
 
         styles.append("a {color: rgb(66, 113, 174);}")
+        styles.append("mark {background: rgb(255, 255, 166);}")
         styles.append(".tags {color: rgb(245, 135, 31); font-weight: bold;}")
         styles.append(".break {text-align: left;}")
         styles.append(".synopsis {font-style: italic;}")