From a67642f0070b87b7a9ba442a6f46795580217ae9 Mon Sep 17 00:00:00 2001 From: Aaron Lasseigne Date: Tue, 22 Feb 2022 19:37:44 -0600 Subject: [PATCH] escape troublesome characters in regexp expressions Found this issue while parsing the ABNF JSON string representation. These two rules caused parsing issues because of the characters generated within the regular expressions that they result in: escape = %x5C ; \ unescaped = %x20-21 / %x23-5B / %x5D-10FFFF ; [ -!] / [#-[] / []-\u{10FFF}] --- lib/ebnf/rule.rb | 12 +++++++++--- spec/rule_spec.rb | 6 +++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index 39625e6..37fb189 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -367,11 +367,11 @@ def to_peg def to_regexp case expr.first when :hex - Regexp.new(translate_codepoints(expr[1])) + Regexp.new(Regexp.escape(translate_codepoints(expr[1]))) when :istr /#{expr.last}/ui when :range - Regexp.new("[#{translate_codepoints(expr[1])}]") + Regexp.new("[#{escape_regexp_character_range(translate_codepoints(expr[1]))}]") else raise "Can't turn #{expr.inspect} into a regexp" end @@ -770,5 +770,11 @@ def make_sym_id(variation = nil) @id_seq += 1 ["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)] end + + # Escape "[", "]", and "\" in ranges so they don't result in a warning or error + # about empty character classes. + def escape_regexp_character_range(character_range) + character_range.gsub(/([\[\]\\])/) {|char| "\\#{char}"} + end end -end \ No newline at end of file +end diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index bffb777..a11f56a 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -481,9 +481,13 @@ describe "#to_regexp" do { hex: [:hex, "#x20", / /], + hex: [:hex, "#x5c", /\\/], range: [:range, "a-b", /[a-b]/], range2: [:range, "a-zA-Z", /[a-zA-Z]/], range3: [:range, "abc-", /[abc-]/], + range4: [:range, "#x23-#x5b", /[#-\[]/], + range5: [:range, "#x5d-#x5e", /[\]-^]/], + range6: [:range, "#x5c-#x5e", /[\\-^]/], }.each do |title, (op, exp, regexp)| it title do expect(EBNF::Rule.new(title, nil, [op, exp]).to_regexp).to eql regexp @@ -1055,4 +1059,4 @@ end end end -end \ No newline at end of file +end