From 72af41be55bc9c51075f3db50262ea2ee1a457c9 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 30 Dec 2015 19:36:47 +0200 Subject: [PATCH] JavaScript extractor: Add rudimentary JSX/E4X support Fixes #280 --- babel/messages/extract.py | 4 +++- babel/messages/jslexer.py | 10 ++++++++-- tests/messages/test_js_extract.py | 25 +++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 8fe3f606c..f8495b02b 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -461,6 +461,8 @@ def extract_javascript(fileobj, keywords, comment_tags, options): :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) + Supported options are: + * `jsx` -- set to false to disable JSX/E4X support. """ from babel.messages.jslexer import tokenize, unquote_string funcname = message_lineno = None @@ -472,7 +474,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options): last_token = None call_stack = -1 - for token in tokenize(fileobj.read().decode(encoding)): + for token in tokenize(fileobj.read().decode(encoding), jsx=options.get("jsx", True)): if token.type == 'operator' and token.value == '(': if funcname: message_lineno = token.lineno diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 22c6e1f9c..c00d8d40b 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -36,6 +36,7 @@ ([eE][-+]?\d+)? | (0x[a-fA-F0-9]+) )''')), + ('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)), ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), ('string', re.compile(r'''(?xs)( '(?:[^'\\]*(?:\\.[^'\\]*)*)' | @@ -127,8 +128,11 @@ def unquote_string(string): return u''.join(result) -def tokenize(source): - """Tokenize a JavaScript source. Returns a generator of tokens. +def tokenize(source, jsx=True): + """ + Tokenize JavaScript/JSX source. Returns a generator of tokens. + + :param jsx: Enable (limited) JSX parsing. """ may_divide = False pos = 0 @@ -138,6 +142,8 @@ def tokenize(source): while pos < end: # handle regular rules first for token_type, rule in rules: + if not jsx and token_type and 'jsx' in token_type: + continue match = rule.match(source, pos) if match is not None: break diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py index 299240e21..ae6d277b3 100644 --- a/tests/messages/test_js_extract.py +++ b/tests/messages/test_js_extract.py @@ -1,4 +1,5 @@ # -- encoding: UTF-8 -- +import pytest from babel._compat import BytesIO from babel.messages import extract @@ -97,3 +98,27 @@ def test_misplaced_comments(): assert messages[1][3] == [u'NOTE: this will show up', 'too.'] assert messages[2][2] == u'no comment here' assert messages[2][3] == [] + + +JSX_SOURCE = b""" +class Foo { + render() { + const value = gettext("hello"); + return ( + + + + ); + } +""" +EXPECTED_JSX_MESSAGES = ["hello", "String1", "String 2", "String 3"] + + +@pytest.mark.parametrize("jsx_enabled", (False, True)) +def test_jsx_extraction(jsx_enabled): + buf = BytesIO(JSX_SOURCE) + messages = [m[2] for m in extract.extract_javascript(buf, ('_', 'gettext'), [], {"jsx": jsx_enabled})] + if jsx_enabled: + assert messages == EXPECTED_JSX_MESSAGES + else: + assert messages != EXPECTED_JSX_MESSAGES