Skip to content

Commit

Permalink
Clean xgettext strings and ignore empties
Browse files Browse the repository at this point in the history
Reference: #11
Reported-by: Armijn Hemel @armijnhemel
Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Mar 15, 2024
1 parent d691562 commit d7a4a36
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 33 deletions.
66 changes: 56 additions & 10 deletions src/source_inspector/strings_xgettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#

import logging
import string

import attr
from commoncode import command
Expand Down Expand Up @@ -55,13 +56,13 @@ def get_source_strings(location, **kwargs):
"""
Return a mapping of strings for a source file at ``location``.
"""
return dict(source_strings=list(collect_strings(location=location, strip=True)))
return dict(source_strings=list(collect_strings(location=location, clean=True)))


def collect_strings(location, strip=False):
def collect_strings(location, clean=True):
"""
Yield mappings of strings collected from file at location.
Strip strings if ``strip`` is True.
Clean strings if ``clean`` is True.
"""
if not is_xgettext_installed():
return
Expand All @@ -82,13 +83,13 @@ def collect_strings(location, strip=False):
if rc != 0:
raise Exception(open(err).read())

yield from parse_po_text(po_text=result, strip=strip)
yield from parse_po_text(po_text=result, clean=clean)


def parse_po_text(po_text, strip=False):
def parse_po_text(po_text, clean=True):
"""
Yield mappings of strings collected from the ``po_text`` string.
Strip strings if ``strip`` is True.
Clean strings if ``clean`` is True.
The po text lines looks like this:
- Blocks sperated by 2 lines.
Expand Down Expand Up @@ -143,12 +144,57 @@ def parse_po_text(po_text, strip=False):
elif line.startswith('"'):
strings.append(line)

strings = [l.strip('"').replace("\\n", "\n") for l in strings]
strings = [l.strip('"') for l in strings]
string = "".join(strings)
if strip:
string = string.strip()
if clean:
string = clean_string(string)
if string:
yield dict(line_numbers=line_numbers, string=string)

yield dict(line_numbers=line_numbers, string=string)

def clean_string(s):
"""
Return a cleaned and normalized string or None.
"""
s = s.strip('"')
s = s.replace("\\n", "\n")
s = s.strip()
non_printables = {
"\\a": "\a",
"\\b": "\b",
"\\v": "\v",
"\\f": "\f",
"\\x01": "\x01",
"\\x02": "\x02",
"\\x03": "\x03",
"\\x04": "\x04",
"\\x05": "\x05",
"\\x06": "\x06",
"\\x0e": "\x0e",
"\\x0f": "\x0f",
"\\x10": "\x10",
"\\x11": "\x11",
"\\x12": "\x12",
"\\x13": "\x13",
"\\x14": "\x14",
"\\x15": "\x15",
"\\x16": "\x16",
"\\x17": "\x17",
"\\x18": "\x18",
"\\x19": "\x19",
"\\x1a": "\x1a",
"\\x1b": "\x1b",
"\\x1c": "\x1c",
"\\x1d": "\x1d",
"\\x1e": "\x1e",
"\\x1f": "\x1f",
"\\x7f": "\x7f",
}

for plain, encoded in non_printables.items():
s = s.replace(plain, "")
s = s.replace(encoded, "")
return s


_IS_XGETTEXT_INSTALLED = None
Expand Down
22 changes: 1 addition & 21 deletions tests/data/strings_xgettext/lineedit.c-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@
"path": "lineedit.c",
"type": "file",
"source_strings": [
{
"line_numbers": [
126
],
"string": "\u001b"
},
{
"line_numbers": [
128,
Expand All @@ -24,14 +18,6 @@
],
"string": "HOME"
},
{
"line_numbers": [
275,
1166,
2858
],
"string": ""
},
{
"line_numbers": [
454,
Expand Down Expand Up @@ -79,12 +65,6 @@
],
"string": "."
},
{
"line_numbers": [
905
],
"string": ""
},
{
"line_numbers": [
1000
Expand Down Expand Up @@ -200,7 +180,7 @@
"line_numbers": [
3052
],
"string": "\\\\[\\\\033[32;1m\\\\]\\\\u@\\\\[\\\\x1b[33;1m\\\\]\\\\h:\\\\[\\\\033[34;1m\\\\]\\\\w\\\\[\\\\033[35;1m\\\\] \\\\!\\\\[\\\\e[36;1m\\\\]\\\\$ \\\\[\\\\E[m\\\\]"
"string": "\\\\[\\\\033[32;1m\\\\]\\\\u@\\\\[\\[33;1m\\\\]\\\\h:\\\\[\\\\033[34;1m\\\\]\\\\w\\\\[\\\\033[35;1m\\\\] \\\\!\\\\[\\\\e[36;1m\\\\]\\\\$ \\\\[\\\\E[m\\\\]"
},
{
"line_numbers": [
Expand Down
4 changes: 2 additions & 2 deletions tests/test_symbols_xgettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_parse_po_text(self):
msgid "Collect source symbols using Universal ctags."
msgstr ""
"""
results = list(parse_po_text(test))
results = list(parse_po_text(test, clean=False))
expected = [
{
"line_numbers": [
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_parse_po_text(self):

assert results == expected

results = list(parse_po_text(test, strip=True))
results = list(parse_po_text(test, clean=True))
expected = [
{
"line_numbers": [
Expand Down

0 comments on commit d7a4a36

Please sign in to comment.