Skip to content

Commit

Permalink
JavaScript: Add JSX/E4X tag lexing
Browse files Browse the repository at this point in the history
Fixes #280
  • Loading branch information
akx committed Feb 7, 2016
1 parent d4cafa4 commit 8fee76c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 4 deletions.
4 changes: 3 additions & 1 deletion babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
Supported options are:
* `jsx` -- set to false to disable JSX/E4X support.
"""
from babel.messages.jslexer import tokenize, unquote_string
funcname = message_lineno = None
Expand All @@ -517,7 +519,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
last_token = None
call_stack = -1

for token in tokenize(fileobj.read().decode(encoding)):
for token in tokenize(fileobj.read().decode(encoding), jsx=options.get("jsx", True)):
if token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
Expand Down
25 changes: 22 additions & 3 deletions babel/messages/jslexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

Token = namedtuple('Token', 'type value lineno')

rules = [
_rules = [
(None, re.compile(r'\s+(?u)')),
(None, re.compile(r'<!--.*')),
('linecomment', re.compile(r'//.*')),
Expand All @@ -43,6 +43,7 @@
([eE][-+]?\d+)? |
(0x[a-fA-F0-9]+)
)''')),
('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)), # May be mangled in `get_rules`
('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
('string', re.compile(r'''(?xs)(
'(?:[^'\\]*(?:\\.[^'\\]*)*)' |
Expand All @@ -51,6 +52,20 @@
]


def get_rules(jsx):
"""
Get a tokenization rule list given the passed syntax options.
Internal to this module.
"""
rules = []
for token_type, rule in _rules:
if not jsx and token_type and 'jsx' in token_type:
continue
rules.append((token_type, rule))
return rules


def indicates_division(token):
"""A helper function that helps the tokenizer to decide if the current
token may be followed by a division operator.
Expand Down Expand Up @@ -116,13 +131,17 @@ def unquote_string(string):
return u''.join(result)


def tokenize(source):
"""Tokenize a JavaScript source. Returns a generator of tokens.
def tokenize(source, jsx=True):
"""
Tokenize JavaScript/JSX source. Returns a generator of tokens.
:param jsx: Enable (limited) JSX parsing.
"""
may_divide = False
pos = 0
lineno = 1
end = len(source)
rules = get_rules(jsx=jsx)

while pos < end:
# handle regular rules first
Expand Down
25 changes: 25 additions & 0 deletions tests/messages/test_js_extract.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -- encoding: UTF-8 --
import pytest
from babel._compat import BytesIO
from babel.messages import extract

Expand Down Expand Up @@ -97,3 +98,27 @@ def test_misplaced_comments():
assert messages[1][3] == [u'NOTE: this will show up', 'too.']
assert messages[2][2] == u'no comment here'
assert messages[2][3] == []


JSX_SOURCE = b"""
class Foo {
render() {
const value = gettext("hello");
return (
<option value="val1">{ i18n._('String1') }</option>
<option value="val2">{ i18n._('String 2') }</option>
<option value="val3">{ i18n._('String 3') }</option>
);
}
"""
EXPECTED_JSX_MESSAGES = ["hello", "String1", "String 2", "String 3"]


@pytest.mark.parametrize("jsx_enabled", (False, True))
def test_jsx_extraction(jsx_enabled):
buf = BytesIO(JSX_SOURCE)
messages = [m[2] for m in extract.extract_javascript(buf, ('_', 'gettext'), [], {"jsx": jsx_enabled})]
if jsx_enabled:
assert messages == EXPECTED_JSX_MESSAGES
else:
assert messages != EXPECTED_JSX_MESSAGES

0 comments on commit 8fee76c

Please sign in to comment.