|
3 | 3 | ===========================
|
4 | 4 |
|
5 | 5 | File History:
|
6 |
| -Created: 2019-04-22 [0.0.1] countWords |
7 | 6 | Created: 2019-05-27 [0.1.4] NWIndex
|
8 | 7 | Created: 2022-05-28 [2.0rc1] IndexItem
|
9 | 8 | Created: 2022-05-28 [2.0rc1] IndexHeading
|
|
40 | 39 | from novelwriter.enum import nwComment, nwItemClass, nwItemType, nwItemLayout
|
41 | 40 | from novelwriter.error import logException
|
42 | 41 | from novelwriter.common import checkInt, isHandle, isItemClass, isTitleTag, jsonEncode
|
43 |
| -from novelwriter.constants import nwFiles, nwKeyWords, nwRegEx, nwUnicode, nwHeaders |
| 42 | +from novelwriter.constants import nwFiles, nwKeyWords, nwHeaders |
| 43 | +from novelwriter.text.counting import standardCounter |
44 | 44 |
|
45 | 45 | if TYPE_CHECKING: # pragma: no cover
|
46 | 46 | from novelwriter.core.item import NWItem
|
@@ -266,7 +266,7 @@ def scanText(self, tHandle: str, text: str, blockSignal: bool = False) -> bool:
|
266 | 266 | self._itemIndex.add(tHandle, tItem)
|
267 | 267 |
|
268 | 268 | # Run word counter for the whole text
|
269 |
| - cC, wC, pC = countWords(text) |
| 269 | + cC, wC, pC = standardCounter(text) |
270 | 270 | tItem.setCharCount(cC)
|
271 | 271 | tItem.setWordCount(wC)
|
272 | 272 | tItem.setParaCount(pC)
|
@@ -400,7 +400,7 @@ def _splitHeading(self, line: str) -> tuple[str, str]:
|
400 | 400 |
|
401 | 401 | def _indexWordCounts(self, tHandle: str, text: str, sTitle: str) -> None:
|
402 | 402 | """Count text stats and save the counts to the index."""
|
403 |
| - cC, wC, pC = countWords(text) |
| 403 | + cC, wC, pC = standardCounter(text) |
404 | 404 | self._itemIndex.setHeadingCounts(tHandle, sTitle, cC, wC, pC)
|
405 | 405 | return
|
406 | 406 |
|
@@ -1315,86 +1315,3 @@ def processComment(text: str) -> tuple[nwComment, str, int]:
|
1315 | 1315 | if content and (clean := classifier.strip().lower()) in CLASSIFIERS:
|
1316 | 1316 | return CLASSIFIERS[clean], content.strip(), text.find(":") + 1
|
1317 | 1317 | return nwComment.PLAIN, check, 0
|
1318 |
| - |
1319 |
| - |
1320 |
| -def countWords(text: str) -> tuple[int, int, int]: |
1321 |
| - """Count words in a piece of text, skipping special syntax and |
1322 |
| - comments. |
1323 |
| - """ |
1324 |
| - charCount = 0 |
1325 |
| - wordCount = 0 |
1326 |
| - paraCount = 0 |
1327 |
| - prevEmpty = True |
1328 |
| - |
1329 |
| - if not isinstance(text, str): |
1330 |
| - return charCount, wordCount, paraCount |
1331 |
| - |
1332 |
| - # We need to treat dashes as word separators for counting words. |
1333 |
| - # The check+replace approach is much faster than direct replace for |
1334 |
| - # large texts, and a bit slower for small texts, but in the latter |
1335 |
| - # case it doesn't really matter. |
1336 |
| - if nwUnicode.U_ENDASH in text: |
1337 |
| - text = text.replace(nwUnicode.U_ENDASH, " ") |
1338 |
| - if nwUnicode.U_EMDASH in text: |
1339 |
| - text = text.replace(nwUnicode.U_EMDASH, " ") |
1340 |
| - |
1341 |
| - # Strip shortcodes |
1342 |
| - if "[" in text: |
1343 |
| - text = nwRegEx.RX_SC.sub("", text) |
1344 |
| - |
1345 |
| - for line in text.splitlines(): |
1346 |
| - |
1347 |
| - countPara = True |
1348 |
| - |
1349 |
| - if not line: |
1350 |
| - prevEmpty = True |
1351 |
| - continue |
1352 |
| - |
1353 |
| - if line[0] == "@" or line[0] == "%": |
1354 |
| - continue |
1355 |
| - |
1356 |
| - if line[0] == "[": |
1357 |
| - check = line.lower() |
1358 |
| - if check.startswith(("[newpage]", "[new page]", "[vspace]")): |
1359 |
| - continue |
1360 |
| - elif check.startswith("[vspace:") and line.endswith("]"): |
1361 |
| - continue |
1362 |
| - |
1363 |
| - elif line[0] == "#": |
1364 |
| - if line[:5] == "#### ": |
1365 |
| - line = line[5:] |
1366 |
| - countPara = False |
1367 |
| - elif line[:4] == "### ": |
1368 |
| - line = line[4:] |
1369 |
| - countPara = False |
1370 |
| - elif line[:3] == "## ": |
1371 |
| - line = line[3:] |
1372 |
| - countPara = False |
1373 |
| - elif line[:2] == "# ": |
1374 |
| - line = line[2:] |
1375 |
| - countPara = False |
1376 |
| - elif line[:3] == "#! ": |
1377 |
| - line = line[3:] |
1378 |
| - countPara = False |
1379 |
| - elif line[:4] == "##! ": |
1380 |
| - line = line[4:] |
1381 |
| - countPara = False |
1382 |
| - |
1383 |
| - elif line[0] == ">" or line[-1] == "<": |
1384 |
| - if line[:2] == ">>": |
1385 |
| - line = line[2:].lstrip(" ") |
1386 |
| - elif line[:1] == ">": |
1387 |
| - line = line[1:].lstrip(" ") |
1388 |
| - if line[-2:] == "<<": |
1389 |
| - line = line[:-2].rstrip(" ") |
1390 |
| - elif line[-1:] == "<": |
1391 |
| - line = line[:-1].rstrip(" ") |
1392 |
| - |
1393 |
| - wordCount += len(line.split()) |
1394 |
| - charCount += len(line) |
1395 |
| - if countPara and prevEmpty: |
1396 |
| - paraCount += 1 |
1397 |
| - |
1398 |
| - prevEmpty = not countPara |
1399 |
| - |
1400 |
| - return charCount, wordCount, paraCount |
0 commit comments