From 28e7b150520dfb3e2484b2b4d1ca11fe6052a21c Mon Sep 17 00:00:00 2001 From: Rik Date: Thu, 11 Feb 2021 19:39:06 -0500 Subject: [PATCH 1/6] Improved javascript template string expression extracting --- babel/messages/extract.py | 53 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 64497762c..f9c2c95ff 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -17,6 +17,7 @@ :license: BSD, see LICENSE for more details. """ +import io import os from os.path import relpath import sys @@ -521,8 +522,10 @@ def extract_javascript(fileobj, keywords, comment_tags, options): :param options: a dictionary of additional options (optional) Supported options are: * `jsx` -- set to false to disable JSX/E4X support. - * `template_string` -- set to false to disable ES6 - template string support. + * `template_string` -- if `True`, supports gettext(`key`) + * `parse_template_string` -- if `True` will parse the + contents of javascript + template strings. """ from babel.messages.jslexer import Token, tokenize, unquote_string funcname = message_lineno = None @@ -551,7 +554,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options): call_stack = 0 token = Token('operator', ')', token.lineno) - if token.type == 'operator' and token.value == '(': + if options.get('parse_template_string') and not funcname and token.type == 'template_string': + for item in parse_template_string(token.value, fileobj, keywords, comment_tags, options): + yield item + + elif token.type == 'operator' and token.value == '(': if funcname: message_lineno = token.lineno call_stack += 1 @@ -643,3 +650,43 @@ def extract_javascript(fileobj, keywords, comment_tags, options): funcname = token.value last_token = token + + +def parse_template_string(template_string, fileobj, keywords, comment_tags, options): + + prev_character = None + level = 0 + inside_str = False + expression_contents = '' + + for character in template_string[1:-1]: + + if not inside_str and character in ('"', "'", '`'): + inside_str = character + elif inside_str == character and prev_character != r'\\': + inside_str = False + + if level: + expression_contents += character + + if not inside_str: + + if character == '{' and prev_character == '$': + level += 1 + + elif level and character == '}': + + level -= 1 + + if level == 0 and expression_contents: + + expression_contents = expression_contents[0:-1] + + fake_file_obj = io.BytesIO(expression_contents.encode()) + + for item in extract_javascript(fake_file_obj, keywords, comment_tags, options): + yield item + + expression_contents = '' + + prev_character = character From f4f2bba47b15ebca2094a29de7823b24cc557251 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 29 Dec 2022 15:15:12 +0100 Subject: [PATCH 2/6] handle line numbers + add tests for JS template strings --- babel/messages/extract.py | 39 +++++++++++++++---------------- babel/messages/jslexer.py | 3 +-- tests/messages/test_js_extract.py | 39 +++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 22 deletions(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index a934c9161..3f726146e 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -18,8 +18,8 @@ import ast import io import os -from os.path import relpath import sys +from os.path import relpath from tokenize import generate_tokens, COMMENT, NAME, OP, STRING from babel.util import parse_encoding, parse_future_flags, pathmatch @@ -533,7 +533,7 @@ def _parse_python_string(value, encoding, future_flags): return None -def extract_javascript(fileobj, keywords, comment_tags, options): +def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1): """Extract messages from JavaScript source code. :param fileobj: the seekable, file-like object the messages should be @@ -549,6 +549,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options): * `parse_template_string` -- if `True` will parse the contents of javascript template strings. + :param lineno: the line number to start from (optional) """ from babel.messages.jslexer import Token, tokenize, unquote_string funcname = message_lineno = None @@ -560,12 +561,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options): last_token = None call_stack = -1 dotted = any('.' in kw for kw in keywords) - for token in tokenize( fileobj.read().decode(encoding), jsx=options.get("jsx", True), template_string=options.get("template_string", True), - dotted=dotted + dotted=dotted, + lineno=lineno ): if ( # Turn keyword`foo` expressions into keyword("foo") calls: funcname and # have a keyword... @@ -578,7 +579,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options): token = Token('operator', ')', token.lineno) if options.get('parse_template_string') and not funcname and token.type == 'template_string': - for item in parse_template_string(token.value, fileobj, keywords, comment_tags, options): + for item in parse_template_string(token.value, keywords, comment_tags, options, token.lineno): yield item elif token.type == 'operator' and token.value == '(': @@ -675,41 +676,39 @@ def extract_javascript(fileobj, keywords, comment_tags, options): last_token = token -def parse_template_string(template_string, fileobj, keywords, comment_tags, options): +def parse_template_string(template_string, keywords, comment_tags, options, lineno): + """Parse JavaScript template string. + :param template_string: the template string to be parsed + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :param lineno: the line number to start from + """ + from babel.messages.jslexer import line_re prev_character = None level = 0 inside_str = False expression_contents = '' - for character in template_string[1:-1]: - if not inside_str and character in ('"', "'", '`'): inside_str = character elif inside_str == character and prev_character != r'\\': inside_str = False - if level: expression_contents += character - if not inside_str: - if character == '{' and prev_character == '$': level += 1 - elif level and character == '}': - level -= 1 - if level == 0 and expression_contents: - expression_contents = expression_contents[0:-1] - fake_file_obj = io.BytesIO(expression_contents.encode()) - - for item in extract_javascript(fake_file_obj, keywords, comment_tags, options): + for item in extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno): yield item - + lineno += len(line_re.findall(expression_contents)) expression_contents = '' - prev_character = character diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 1264b2dbc..0a5366460 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -151,7 +151,7 @@ def unquote_string(string): return u''.join(result) -def tokenize(source, jsx=True, dotted=True, template_string=True): +def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1): """ Tokenize JavaScript/JSX source. Returns a generator of tokens. @@ -161,7 +161,6 @@ def tokenize(source, jsx=True, dotted=True, template_string=True): """ may_divide = False pos = 0 - lineno = 1 end = len(source) rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string) diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py index 72c521144..95985c0f7 100644 --- a/tests/messages/test_js_extract.py +++ b/tests/messages/test_js_extract.py @@ -150,3 +150,42 @@ def test_template_string_tag_usage(): ) assert messages == [(1, 'Tag template, wow', [], None)] + + +def test_inside_template_string(): + buf = BytesIO(b"const msg = `${gettext('Hello')} ${user.name}`") + messages = list( + extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True}) + ) + + assert messages == [(1, 'Hello', [], None)] + + +def test_inside_template_string_with_linebreaks(): + buf = BytesIO(b"""\ +const userName = gettext('Username') +const msg = `${ +gettext('Hello') +} ${userName} ${ +gettext('Are you having a nice day?') +}` +const msg2 = `${ +gettext('Howdy') +} ${userName} ${ +gettext('Are you doing ok?') +}` +""") + messages = list( + extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True}) + ) + + assert messages == [(1, 'Username', [], None), (3, 'Hello', [], None), (5, 'Are you having a nice day?', [], None), (8, 'Howdy', [], None), (10, 'Are you doing ok?', [], None)] + + +def test_inside_nested_template_string(): + buf = BytesIO(b"const msg = `${gettext('Greetings!')} ${ evening ? `${user.name}: ${gettext('This is a lovely evening.')}` : `${gettext('The day is really nice!')} ${user.name}`}`") + messages = list( + extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True}) + ) + + assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)] From b67464525da45c9b5a53ebb76f4af5b99ad9c3e1 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Fri, 6 Jan 2023 20:07:25 +0100 Subject: [PATCH 3/6] Update babel/messages/extract.py Co-authored-by: Aarni Koskela --- babel/messages/extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 3f726146e..36a69330c 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -685,7 +685,7 @@ def parse_template_string(template_string, keywords, comment_tags, options, line :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) - :param lineno: the line number to start from + :param lineno: starting line number (optional) """ from babel.messages.jslexer import line_re prev_character = None From e2da5f7bb88d89468e3215208c6e11b56b91fff1 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Fri, 6 Jan 2023 20:07:37 +0100 Subject: [PATCH 4/6] Update babel/messages/extract.py Co-authored-by: Aarni Koskela --- babel/messages/extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 36a69330c..adbc9bfdb 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -549,7 +549,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1): * `parse_template_string` -- if `True` will parse the contents of javascript template strings. - :param lineno: the line number to start from (optional) + :param lineno: line number offset (for parsing embedded fragments) """ from babel.messages.jslexer import Token, tokenize, unquote_string funcname = message_lineno = None From bedf5bec6fee99fe12b726bc647ab22e36e82fd9 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Fri, 6 Jan 2023 20:10:35 +0100 Subject: [PATCH 5/6] Update babel/messages/extract.py Co-authored-by: Aarni Koskela --- babel/messages/extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index adbc9bfdb..c19dd5af2 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -676,7 +676,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1): last_token = token -def parse_template_string(template_string, keywords, comment_tags, options, lineno): +def parse_template_string(template_string, keywords, comment_tags, options, lineno=1): """Parse JavaScript template string. :param template_string: the template string to be parsed From 37ec294471a2d67a6266a8f380e5b4797917ef50 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Fri, 6 Jan 2023 20:13:03 +0100 Subject: [PATCH 6/6] Update babel/messages/jslexer.py according to request --- babel/messages/jslexer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py index 0a5366460..886f69d20 100644 --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -158,6 +158,7 @@ def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1): :param jsx: Enable (limited) JSX parsing. :param dotted: Read dotted names as single name token. :param template_string: Support ES6 template strings + :param lineno: starting line number (optional) """ may_divide = False pos = 0