From 782d1899fa4409b78fd8261e467728ccb215b9f9 Mon Sep 17 00:00:00 2001 From: Jason Barnabe Date: Fri, 27 Jun 2014 22:00:20 -0500 Subject: [PATCH] Update JS idents to match ECMA 5.1 --- lib/coderay/scanners/java_script.rb | 3 ++- test/functional/basic.rb | 37 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb index 9eb0a0a1..4a5d4a2f 100644 --- a/lib/coderay/scanners/java_script.rb +++ b/lib/coderay/scanners/java_script.rb @@ -113,7 +113,8 @@ def scan_tokens encoder, options function_expected = key_expected = value_expected = false encoder.text_token match, :operator - elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) + # Follows http://es5.github.io/x7.html#x7.6, but does not honor "A UnicodeEscapeSequence cannot be used to put a character into an IdentifierName that would otherwise be illegal." + elsif match = scan(/ ([\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_]|\\u[0-9a-fA-F]{4})([\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}$_\p{Mn}\{Mc}\p{Nd}\p{Pc}\u200C\u200D]|\\u[0-9a-fA-F]{4})* /x) kind = IDENT_KIND[match] value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] # TODO: labels diff --git a/test/functional/basic.rb b/test/functional/basic.rb index 752d4ba0..9c684656 100755 --- a/test/functional/basic.rb +++ b/test/functional/basic.rb @@ -314,5 +314,42 @@ def test_scan_a_non_string CodeRay.highlight CodeRay, :plain end end + + JS_UNICODE_IDENT_TEST_CODE = 'var 動 = 1;' + JS_UNICODE_IDENT_TEST_TOKENS = [ + ['var', :keyword], + [' ', :space], + ['動', :ident], + [' ', :space], + ['=', :operator], + [' ', :space], + ["1", :integer], + [";", :operator] + ].flatten + def test_js_scan_unicode_ident_token + assert_nothing_raised do + assert_equal JS_UNICODE_IDENT_TEST_TOKENS, CodeRay.scan(JS_UNICODE_IDENT_TEST_CODE, :java_script).tokens + end + end + + # Actual JS variable name is \u1212 here, extra backslash is to escape Ruby + JS_ESCAPED_UNICODE_IDENT_TEST_CODE = 'var \\u1212 = 1;' + + JS_ESCAPED_UNICODE_IDENT_TEST_TOKENS = [ + ['var', :keyword], + [' ', :space], + ['\\u1212', :ident], + [' ', :space], + ['=', :operator], + [' ', :space], + ["1", :integer], + [";", :operator] + ].flatten + def test_js_scan_escaped_unicode_ident_token + assert_nothing_raised do + assert_equal JS_ESCAPED_UNICODE_IDENT_TEST_TOKENS, CodeRay.scan(JS_ESCAPED_UNICODE_IDENT_TEST_CODE, :java_script).tokens + end + end + end