From d82029c3ce544dc5ec81024c953cdd4cf6fe2816 Mon Sep 17 00:00:00 2001 From: Stephen Touset Date: Fri, 23 Oct 2020 16:15:48 -0700 Subject: [PATCH 1/6] Support `:rept` expressions in PEG --- lib/ebnf/peg/rule.rb | 9 +++++++++ spec/peg/rule_spec.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index 0934bb8..115ba51 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -24,6 +24,7 @@ module Rule # * `opt`: returns the value matched, or `nil` if unmatched. # * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string. # * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched. + # * `rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string. # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence. # * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string. # @@ -142,6 +143,14 @@ def parse(input) parser.update_furthest_failure(input.pos, input.lineno, expr[1]) :unmatched end + when :rept + # Result is an array of all expressions while they match, + # an empty array of none match + rept = rept(input, expr[1], expr[2], expr[3]) + + # # Update furthest failure for strings and terminals + parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal? + rept.is_a?(Array) && terminal? ? rept.join("") : rept when :seq # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering. seq = expr[1..-1].each_with_object([]) do |prod, accumulator| diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb index e3838c1..783f887 100644 --- a/spec/peg/rule_spec.rb +++ b/spec/peg/rule_spec.rb @@ -79,6 +79,21 @@ input: " A A A ", expect: %w(A A A) }, + "(rept 1 2 A) with ' A A A '" => { + rule: [:rept, 1, 2, "A"], + input: " A A A ", + expect: %w(A A) + }, + "(rept 1 4 A) with ' A A A '" => { + rule: [:rept, 1, 4, "A"], + input: " A A A ", + expect: %w(A A A) + }, + "(rept 4 10 A) with ' A A A '" => { + rule: [:rept, 4, 10, "A"], + input: " A A A ", + expect: :unmatched + }, "(seq 'A' 'B')" => { rule: [:seq, "A", "B"], input: "A B", @@ -213,6 +228,21 @@ input: " A A A ", expect: %w(A A A) }, + "(rept 1 2 A) with ' A A A '" => { + rule: [:rept, 1, 2, "A"], + input: " A A A ", + expect: %w(A A) + }, + "(rept 1 4 A) with ' A A A '" => { + rule: [:rept, 1, 4, "A"], + input: " A A A ", + expect: %w(A A A) + }, + "(rept 4 10 A) with ' A A A '" => { + rule: [:rept, 4, 10, "A"], + input: " A A A ", + expect: :unmatched + }, "(seq 'A' 'B')" => { rule: [:seq, "A", "B"], input: "A B", From c2d98cfb0b21a941c4da9038c0a48afc183c638e Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 4 Oct 2020 17:18:10 -0700 Subject: [PATCH 2/6] Minor HTML format updates. --- lib/ebnf/writer.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 1033a52..8b5b66b 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -133,7 +133,10 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) OpenStruct.new(id: ("@#{rule.kind}"), sym: nil, assign: nil, - formatted: ("# Productions for terminals" if rule.kind == :terminals)) + formatted: ( + rule.kind == :terminals ? + "# Productions for terminals" : + self.send(format_meth, rule.expr))) else formatted_expr = self.send(format_meth, rule.expr) # Measure text without markup @@ -685,9 +688,11 @@ def format_isoebnf_range(string) <% for rule in @rules %> > <% if rule.id %> - <%= rule.id %> + ><%= rule.id %> <% end %> + <% if rule.sym %> <%== rule.sym %> + <% end %> <%= rule.assign %> <%= rule.formatted %> From 05453298ac56c7dc77487d799b5429a9a42a6944 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 24 Oct 2020 17:02:53 -0700 Subject: [PATCH 3/6] Use native parser for builds and tests so that changes to the PEG parser don't spread too far. --- Rakefile | 18 +++++++++--------- spec/base_spec.rb | 6 +++--- spec/bnf_spec.rb | 2 +- spec/peg_spec.rb | 2 +- spec/rule_spec.rb | 2 +- spec/spec_helper.rb | 2 +- spec/writer_spec.rb | 8 ++++---- 7 files changed, 20 insertions(+), 20 deletions(-) mode change 100755 => 100644 Rakefile diff --git a/Rakefile b/Rakefile old mode 100755 new mode 100644 index 63b905b..770ce79 --- a/Rakefile +++ b/Rakefile @@ -60,11 +60,11 @@ desc "Build meta files for ABNF, EBNF and ISO EBNF" task :meta => %w{lib/ebnf/ebnf/meta.rb lib/ebnf/isoebnf/meta.rb lib/ebnf/abnf/meta.rb lib/ebnf/abnf/core.rb} file "lib/ebnf/abnf/meta.rb" => "etc/abnf.ebnf" do - %x(bin/ebnf --peg -f rb --mod-name ABNFMeta -o lib/ebnf/abnf/meta.rb etc/abnf.ebnf) + %x(bin/ebnf --input-format native --peg -f rb --mod-name ABNFMeta -o lib/ebnf/abnf/meta.rb etc/abnf.ebnf) end file "lib/ebnf/abnf/core.rb" => "etc/abnf-core.ebnf" do - %x(bin/ebnf -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) + %x(bin/ebnf --input-format native -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) end file "lib/ebnf/ebnf/meta.rb" => "etc/ebnf.peg.rb" do @@ -72,7 +72,7 @@ file "lib/ebnf/ebnf/meta.rb" => "etc/ebnf.peg.rb" do end file "lib/ebnf/isoebnf/meta.rb" => "etc/iso-ebnf.ebnf" do - %x(bin/ebnf --peg -f rb --mod-name ISOEBNFMeta -o lib/ebnf/isoebnf/meta.rb etc/iso-ebnf.ebnf) + %x(bin/ebnf --input-format native --peg -f rb --mod-name ISOEBNFMeta -o lib/ebnf/isoebnf/meta.rb etc/iso-ebnf.ebnf) end @@ -80,7 +80,7 @@ end rule ".sxp" => %w{.ebnf} do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --input-format native #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -89,7 +89,7 @@ end rule ".peg.sxp" => %w{.ebnf} do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --peg #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --input-format native --peg #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -97,13 +97,13 @@ end rule ".html" => %w{.ebnf} do |t| puts "build #{t.name}" - %x(bin/ebnf --format html -o #{t.name} #{t.source}) + %x(bin/ebnf --input-format native --format html -o #{t.name} #{t.source}) end file "etc/ebnf.ll1.sxp" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --ll1 ebnf #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --input-format native --ll1 ebnf #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -111,10 +111,10 @@ end file "etc/ebnf.peg.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --peg --mod-name EBNFMeta -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) + %x(bin/ebnf --input-format native --peg --mod-name EBNFMeta -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) end file "etc/ebnf.ll1.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) + %x(bin/ebnf --input-format native --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) end diff --git a/spec/base_spec.rb b/spec/base_spec.rb index 7510b48..44c5c17 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -83,7 +83,7 @@ end describe "#validate!" do - let(:simple) {EBNF.parse("a ::= b")} + let(:simple) {EBNF.parse("a ::= b", format: :native)} it "notes invalid grammar" do expect do expect {simple.validate!}.to raise_error SyntaxError, "In rule a: No rule found for b" @@ -96,7 +96,7 @@ end describe "#valid?" do - let(:simple) {EBNF.parse("a ::= b")} + let(:simple) {EBNF.parse("a ::= b", format: :native)} it "notes invalid grammar" do expect do expect(simple.valid?).to be_falsey @@ -152,7 +152,7 @@ def parse(value, **options) @debug = [] - options = {debug: @debug}.merge(options) + options = {debug: @debug, format: :native}.merge(options) EBNF::Base.new(value, **options) end end diff --git a/spec/bnf_spec.rb b/spec/bnf_spec.rb index 137cfbb..f9278d1 100644 --- a/spec/bnf_spec.rb +++ b/spec/bnf_spec.rb @@ -69,7 +69,7 @@ def parse(value, **options) @debug = [] - options = {debug: @debug}.merge(options) + options = {debug: @debug, format: :native}.merge(options) EBNF::Base.new(value, **options) end end diff --git a/spec/peg_spec.rb b/spec/peg_spec.rb index a354203..5d06e47 100644 --- a/spec/peg_spec.rb +++ b/spec/peg_spec.rb @@ -54,7 +54,7 @@ def parse(value, **options) @debug = [] - options = {debug: @debug}.merge(options) + options = {debug: @debug, format: :native}.merge(options) EBNF::Base.new(value, **options) end end diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 36b2d5e..bffb777 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -6,7 +6,7 @@ describe EBNF::Rule do let(:debug) {[]} - let(:ebnf) {EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))} + let(:ebnf) {EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)), format: :native)} subject {EBNF::Rule.new(:rule, "0", [:seq, :foo])} describe ".from_sxp" do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 10837a5..e1a4b1a 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -38,4 +38,4 @@ require 'ebnf' -PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))).freeze \ No newline at end of file +PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)), format: :native).freeze \ No newline at end of file diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index c14a230..1798fd8 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -47,7 +47,7 @@ ], }.each do |title, (grammar, plain)| context title do - subject {EBNF::Base.new(grammar).ast} + subject {EBNF::Base.new(grammar, format: :native).ast} it "generates plain" do expect(EBNF::Writer.string(*subject)).to eq plain @@ -64,7 +64,7 @@ ], }.each do |title, (grammar, plain)| context title do - subject {EBNF::Base.new(grammar).ast} + subject {EBNF::Base.new(grammar, format: :native).ast} it "generates plain" do expect {EBNF::Writer.print(*subject)}.to write(plain).to(:output) @@ -89,7 +89,7 @@ ], }.each do |title, (grammar, xpaths)| context title do - subject {EBNF::Writer.html(*EBNF::Base.new(grammar).ast)} + subject {EBNF::Writer.html(*EBNF::Base.new(grammar, format: :native).ast)} xpaths.each do |path, value| specify {is_expected.to have_xpath(path, value)} end @@ -106,7 +106,7 @@ ], }.each do |title, (grammar, plain)| context title do - subject {EBNF::Base.new(grammar).ast} + subject {EBNF::Base.new(grammar, format: :native).ast} it "generates plain" do expect {EBNF::Writer.new(subject)}.to write(plain).to(:output) From 6f8723c77c364fa96721bff93531dad5b3d8e06c Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 24 Oct 2020 17:11:57 -0700 Subject: [PATCH 4/6] Note ambiguity inherent in using rule identifiers which conflict with ranges. For #8. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 461c426..12eda9b 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,8 @@ On a parsing failure, and exception is raised with information that may be usefu The [EBNF][] variant used here is based on [W3C](https://w3.org/) [EBNF][] (see {file:etc/ebnf.ebnf EBNF grammar}) as defined in the [XML 1.0 recommendation](https://www.w3.org/TR/REC-xml/), with minor extensions: +Note that the grammar includes an optional `[identifer]` in front of rule names, which can be in conflict with the `RANGE` terminal. It is typically not a problem, but if it comes up, try parsing with the `native` parser, add comments or sequences to disambiguate. EBNF does not have beginning of line checks as all whitespace is treated the same, so the common practice of identifying each rule inherently leads to such ambiguity. + The character set for EBNF is UTF-8. The general form of a rule is: From 6d89a0244f97c269cb74e3614efc1049daca5dde Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 25 Oct 2020 16:38:33 -0700 Subject: [PATCH 5/6] Update PDD info in the README. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 12eda9b..8a02e3a 100644 --- a/README.md +++ b/README.md @@ -261,7 +261,8 @@ This repository uses [Git Flow](https://github.com/nvie/gitflow) to mange develo list in the the `README`. Alphabetical order applies. * Do note that in order for us to merge any non-trivial changes (as a rule of thumb, additions larger than about 15 lines of code), we need an - explicit [public domain dedication][PDD] on record from you. + explicit [public domain dedication][PDD] on record from you, + which you will be asked to agree to on the first commit to a repo within the organization. ## License This is free and unencumbered public domain software. For more information, From 084fe89a37533cd898db5c8f5a6466fa9be7925b Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 25 Oct 2020 16:53:33 -0700 Subject: [PATCH 6/6] Version 2.1.2. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 3e3c2f1..eca07e4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.1 +2.1.2