Skip to content

Commit 70a4b3a

Browse files
Jonathan Ross RogersJonathanRRogers
Jonathan Ross Rogers
authored andcommitted
extract: Determine python-format flag explicitly
During extraction, Message instances can be created with the "python-format" flag, indicating that the message string contains Python percent-formatting placeholders. To avoid setting the flag erroneously because the string source is not Python code or otherwise is not expected to contain such placeholders, the extractor interface must be extended to allow extractor functions to indicate which flags are valid. Fixes python-babel#35
1 parent 7ed6cc5 commit 70a4b3a

File tree

6 files changed

+85
-64
lines changed

6 files changed

+85
-64
lines changed

babel/messages/catalog.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,7 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
9999
self.string = string
100100
self.locations = list(distinct(locations))
101101
self.flags = set(flags)
102-
if id and self.python_format:
103-
self.flags.add('python-format')
104-
else:
105-
self.flags.discard('python-format')
102+
106103
self.auto_comments = list(distinct(auto_comments))
107104
self.user_comments = list(distinct(user_comments))
108105
if isinstance(previous_id, string_types):
@@ -112,6 +109,13 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
112109
self.lineno = lineno
113110
self.context = context
114111

112+
def determine_python_format(self):
113+
"""Sets python-format flag if message contains a format string"""
114+
if self.id and self.python_format:
115+
self.flags.add('python-format')
116+
else:
117+
self.flags.discard('python-format')
118+
115119
def __repr__(self):
116120
return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
117121
list(self.flags))

babel/messages/extract.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING,
6767
"""Extract messages from any source files found in the given directory.
6868
6969
This function generates tuples of the form ``(filename, lineno, message,
70-
comments, context)``.
70+
comments, context, flags)``.
7171
7272
Which extraction method is used per file is determined by the `method_map`
7373
parameter, which maps extended glob patterns to extraction method names.
@@ -220,7 +220,7 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
220220
comment_tags=(), options=None, strip_comment_tags=False):
221221
"""Extract messages from a specific file.
222222
223-
This function returns a list of tuples of the form ``(lineno, message, comments, context)``.
223+
This function returns a list of tuples of the form ``(lineno, message, comments, context, flags)``.
224224
225225
:param filename: the path to the file to extract messages from
226226
:param method: a string specifying the extraction method (.e.g. "python")
@@ -246,7 +246,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
246246
"""Extract messages from the given file-like object using the specified
247247
extraction method.
248248
249-
This function returns tuples of the form ``(lineno, message, comments, context)``.
249+
This function returns tuples of the form ``(lineno, message, comments, context, flags)``.
250250
251251
The implementation dispatches the actual extraction to plugins, based on the
252252
value of the ``method`` parameter.
@@ -259,7 +259,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
259259
>>> from babel._compat import BytesIO
260260
>>> for message in extract('python', BytesIO(source)):
261261
... print(message)
262-
(3, u'Hello, world!', [], None)
262+
(3, u'Hello, world!', [], None, ())
263263
264264
:param method: an extraction method (a callable), or
265265
a string specifying the extraction method (.e.g. "python");
@@ -316,10 +316,17 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
316316
if func is None:
317317
raise ValueError('Unknown extraction method %r' % method)
318318

319-
results = func(fileobj, keywords.keys(), comment_tags,
320-
options=options or {})
319+
for result in func(fileobj, keywords.keys(), comment_tags,
320+
options=options or {}):
321+
flags = ()
322+
if len(result) == 4:
323+
lineno, funcname, messages, comments = result
324+
elif len(result) == 5:
325+
lineno, funcname, messages, comments, flags = result
326+
else:
327+
raise ValueError(
328+
'Extraction function must yield tuples with 4 or 5 values')
321329

322-
for lineno, funcname, messages, comments in results:
323330
if funcname:
324331
spec = keywords[funcname] or (1,)
325332
else:
@@ -370,7 +377,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
370377

371378
if strip_comment_tags:
372379
_strip_comment_tags(comments, comment_tags)
373-
yield lineno, messages, comments, context
380+
yield lineno, messages, comments, context, flags
374381

375382

376383
def extract_nothing(fileobj, keywords, comment_tags, options):
@@ -465,7 +472,7 @@ def extract_python(fileobj, keywords, comment_tags, options):
465472
translator_comments = []
466473

467474
yield (message_lineno, funcname, messages,
468-
[comment[1] for comment in translator_comments])
475+
[comment[1] for comment in translator_comments], ())
469476

470477
funcname = lineno = message_lineno = None
471478
call_stack = -1

babel/messages/frontend.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -467,13 +467,13 @@ def callback(filename, method, options):
467467
callback=callback,
468468
strip_comment_tags=self.strip_comments
469469
)
470-
for filename, lineno, message, comments, context in extracted:
470+
for fname, lineno, msg, comments, context, flags in extracted:
471471
if os.path.isfile(path):
472-
filepath = filename # already normalized
472+
filepath = fname # already normalized
473473
else:
474-
filepath = os.path.normpath(os.path.join(path, filename))
474+
filepath = os.path.normpath(os.path.join(path, fname))
475475

476-
catalog.add(message, None, [(filepath, lineno)],
476+
catalog.add(msg, None, [(filepath, lineno)], flags=flags,
477477
auto_comments=comments, context=context)
478478

479479
self.log.info('writing PO template file to %s', self.output_file)

babel/messages/pofile.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
442442
message catalog to the provided file-like object.
443443
444444
>>> catalog = Catalog()
445-
>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
445+
>>> message = catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
446446
... flags=('fuzzy',))
447-
<Message...>
447+
>>> message.determine_python_format()
448448
>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
449449
<Message...>
450450
>>> from babel._compat import BytesIO

tests/messages/test_extract.py

+46-36
Original file line numberDiff line numberDiff line change
@@ -37,33 +37,34 @@ def test_nested_calls(self):
3737
messages = list(extract.extract_python(buf,
3838
extract.DEFAULT_KEYWORDS.keys(),
3939
[], {}))
40-
self.assertEqual([
41-
(1, '_', None, []),
42-
(2, 'ungettext', (None, None, None), []),
43-
(3, 'ungettext', (u'Babel', None, None), []),
44-
(4, 'ungettext', (None, u'Babels', None), []),
45-
(5, 'ungettext', (u'bunny', u'bunnies', None), []),
46-
(6, 'ungettext', (None, u'bunnies', None), []),
47-
(7, '_', None, []),
48-
(8, 'gettext', u'Rabbit', []),
49-
(9, 'dgettext', (u'wiki', None), []),
50-
(10, 'dngettext', (None, u'Page', u'Pages', None), [])],
51-
messages)
40+
self.assertEqual(
41+
[
42+
(1, '_', None, [], ()),
43+
(2, 'ungettext', (None, None, None), [], ()),
44+
(3, 'ungettext', (u'Babel', None, None), [], ()),
45+
(4, 'ungettext', (None, u'Babels', None), [], ()),
46+
(5, 'ungettext', (u'bunny', u'bunnies', None), [], ()),
47+
(6, 'ungettext', (None, u'bunnies', None), [], ()),
48+
(7, '_', None, [], ()),
49+
(8, 'gettext', u'Rabbit', [], ()),
50+
(9, 'dgettext', (u'wiki', None), [], ()),
51+
(10, 'dngettext', (None, u'Page', u'Pages', None), [], ())
52+
], messages)
5253

5354
def test_extract_default_encoding_ascii(self):
5455
buf = BytesIO(b'_("a")')
5556
messages = list(extract.extract_python(
5657
buf, list(extract.DEFAULT_KEYWORDS), [], {},
5758
))
5859
# Should work great in both py2 and py3
59-
self.assertEqual([(1, '_', 'a', [])], messages)
60+
self.assertEqual([(1, '_', 'a', [], ())], messages)
6061

6162
def test_extract_default_encoding_utf8(self):
6263
buf = BytesIO(u'_("☃")'.encode('UTF-8'))
6364
messages = list(extract.extract_python(
6465
buf, list(extract.DEFAULT_KEYWORDS), [], {},
6566
))
66-
self.assertEqual([(1, '_', u'☃', [])], messages)
67+
self.assertEqual([(1, '_', u'☃', [], ())], messages)
6768

6869
def test_nested_comments(self):
6970
buf = BytesIO(b"""\
@@ -73,7 +74,7 @@ def test_nested_comments(self):
7374
""")
7475
messages = list(extract.extract_python(buf, ('ngettext',),
7576
['TRANSLATORS:'], {}))
76-
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])],
77+
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], ())],
7778
messages)
7879

7980
def test_comments_with_calls_that_spawn_multiple_lines(self):
@@ -98,21 +99,21 @@ def test_comments_with_calls_that_spawn_multiple_lines(self):
9899

99100
{'strip_comment_tags': False}))
100101
self.assertEqual((6, '_', 'Locale deleted.',
101-
[u'NOTE: This Comment SHOULD Be Extracted']),
102+
[u'NOTE: This Comment SHOULD Be Extracted'], ()),
102103
messages[1])
103104
self.assertEqual((10, 'ngettext', (u'Foo deleted.', u'Foos deleted.',
104105
None),
105-
[u'NOTE: This Comment SHOULD Be Extracted']),
106+
[u'NOTE: This Comment SHOULD Be Extracted'], ()),
106107
messages[2])
107108
self.assertEqual((3, 'ngettext',
108-
(u'Catalog deleted.',
109-
u'Catalogs deleted.', None),
110-
[u'NOTE: This Comment SHOULD Be Extracted']),
109+
(u'Catalog deleted.',
110+
u'Catalogs deleted.', None),
111+
[u'NOTE: This Comment SHOULD Be Extracted'], ()),
111112
messages[0])
112113
self.assertEqual((15, 'ngettext', (u'Bar deleted.', u'Bars deleted.',
113114
None),
114115
[u'NOTE: This Comment SHOULD Be Extracted',
115-
u'NOTE: And This One Too']),
116+
u'NOTE: And This One Too'], ()),
116117
messages[3])
117118

118119
def test_declarations(self):
@@ -129,9 +130,9 @@ class Meta:
129130
messages = list(extract.extract_python(buf,
130131
extract.DEFAULT_KEYWORDS.keys(),
131132
[], {}))
132-
self.assertEqual([(3, '_', u'Page arg 1', []),
133-
(3, '_', u'Page arg 2', []),
134-
(8, '_', u'log entry', [])],
133+
self.assertEqual([(3, '_', u'Page arg 1', [], ()),
134+
(3, '_', u'Page arg 2', [], ()),
135+
(8, '_', u'log entry', [], ())],
135136
messages)
136137

137138
def test_multiline(self):
@@ -143,8 +144,8 @@ def test_multiline(self):
143144
count)
144145
""")
145146
messages = list(extract.extract_python(buf, ('ngettext',), [], {}))
146-
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), []),
147-
(3, 'ngettext', (u'elvis', u'elvises', None), [])],
147+
self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], ()),
148+
(3, 'ngettext', (u'elvis', u'elvises', None), [], ())],
148149
messages)
149150

150151
def test_npgettext(self):
@@ -156,8 +157,8 @@ def test_npgettext(self):
156157
count)
157158
""")
158159
messages = list(extract.extract_python(buf, ('npgettext',), [], {}))
159-
self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), []),
160-
(3, 'npgettext', (u'Strings', u'elvis', u'elvises', None), [])],
160+
self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), [], ()),
161+
(3, 'npgettext', (u'Strings', u'elvis', u'elvises', None), [], ())],
161162
messages)
162163
buf = BytesIO(b"""\
163164
msg = npgettext('Strings', 'pylon', # TRANSLATORS: shouldn't be
@@ -166,7 +167,7 @@ def test_npgettext(self):
166167
""")
167168
messages = list(extract.extract_python(buf, ('npgettext',),
168169
['TRANSLATORS:'], {}))
169-
self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), [])],
170+
self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), [], ())],
170171
messages)
171172

172173
def test_triple_quoted_strings(self):
@@ -178,9 +179,9 @@ def test_triple_quoted_strings(self):
178179
messages = list(extract.extract_python(buf,
179180
extract.DEFAULT_KEYWORDS.keys(),
180181
[], {}))
181-
self.assertEqual([(1, '_', u'pylons', []),
182-
(2, 'ngettext', (u'elvis', u'elvises', None), []),
183-
(3, 'ngettext', (u'elvis', u'elvises', None), [])],
182+
self.assertEqual([(1, '_', u'pylons', [], ()),
183+
(2, 'ngettext', (u'elvis', u'elvises', None), [], ()),
184+
(3, 'ngettext', (u'elvis', u'elvises', None), [], ())],
184185
messages)
185186

186187
def test_multiline_strings(self):
@@ -196,7 +197,7 @@ def test_multiline_strings(self):
196197
[(1, '_',
197198
u'This module provides internationalization and localization\n'
198199
'support for your Python programs by providing an interface to '
199-
'the GNU\ngettext message catalog library.', [])],
200+
'the GNU\ngettext message catalog library.', [], ())],
200201
messages)
201202

202203
def test_concatenated_strings(self):
@@ -456,6 +457,10 @@ def test_nested_messages(self):
456457
self.assertEqual([], messages[7][3])
457458

458459

460+
def extract_bad(fileobj, keywords, comment_tags, options):
461+
yield (None,)
462+
463+
459464
class ExtractTestCase(unittest.TestCase):
460465

461466
def test_invalid_filter(self):
@@ -474,14 +479,19 @@ def test_invalid_filter(self):
474479
messages = \
475480
list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [],
476481
{}))
477-
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None),
478-
(8, u'Rabbit', [], None),
479-
(10, (u'Page', u'Pages'), [], None)], messages)
482+
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None, ()),
483+
(8, u'Rabbit', [], None, ()),
484+
(10, (u'Page', u'Pages'), [], None, ())], messages)
480485

481486
def test_invalid_extract_method(self):
482487
buf = BytesIO(b'')
483488
self.assertRaises(ValueError, list, extract.extract('spam', buf))
484489

490+
def test_bad_extract_function(self):
491+
self.assertRaises(
492+
ValueError, list,
493+
extract.extract('tests.messages.test_extract:extract_bad', ''))
494+
485495
def test_different_signatures(self):
486496
buf = BytesIO(b"""
487497
foo = _('foo', 'bar')

tests/messages/test_js_extract.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ def test_simple_extract():
1414
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS,
1515
[], {}))
1616

17-
assert messages == [(1, 'simple', [], None),
18-
(2, 'simple', [], None),
19-
(3, ('s', 'p'), [], None)]
17+
assert messages == [(1, 'simple', [], None, ()),
18+
(2, 'simple', [], None, ()),
19+
(3, ('s', 'p'), [], None, ())]
2020

2121

2222
def test_various_calls():
@@ -36,9 +36,9 @@ def test_various_calls():
3636
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [],
3737
{}))
3838
assert messages == [
39-
(5, (u'bunny', u'bunnies'), [], None),
40-
(8, u'Rabbit', [], None),
41-
(10, (u'Page', u'Pages'), [], None)
39+
(5, (u'bunny', u'bunnies'), [], None, ()),
40+
(8, u'Rabbit', [], None, ()),
41+
(10, (u'Page', u'Pages'), [], None, ())
4242
]
4343

4444

@@ -132,7 +132,7 @@ def test_dotted_keyword_extract():
132132
extract.extract('javascript', buf, {"com.corporate.i18n.formatMessage": None}, [], {})
133133
)
134134

135-
assert messages == [(1, 'Insert coin to continue', [], None)]
135+
assert messages == [(1, 'Insert coin to continue', [], None, ())]
136136

137137

138138
def test_template_string_standard_usage():
@@ -141,7 +141,7 @@ def test_template_string_standard_usage():
141141
extract.extract('javascript', buf, {"gettext": None}, [], {})
142142
)
143143

144-
assert messages == [(1, 'Very template, wow', [], None)]
144+
assert messages == [(1, 'Very template, wow', [], None, ())]
145145

146146

147147
def test_template_string_tag_usage():
@@ -150,4 +150,4 @@ def test_template_string_tag_usage():
150150
extract.extract('javascript', buf, {"i18n": None}, [], {})
151151
)
152152

153-
assert messages == [(1, 'Tag template, wow', [], None)]
153+
assert messages == [(1, 'Tag template, wow', [], None, ())]

0 commit comments

Comments
 (0)