diff --git a/.travis.yml b/.travis.yml index 7e1193e..46eb519 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,9 @@ language: ruby script: "bundle exec rspec spec" env: - - NOKOGIRI_USE_SYSTEM_LIBRARIES=true - CI=true + global: + - NOKOGIRI_USE_SYSTEM_LIBRARIES=true rvm: - 2.4 - 2.5 diff --git a/VERSION b/VERSION index 7ec1d6d..3e3c2f1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.0 +2.1.1 diff --git a/ebnf.gemspec b/ebnf.gemspec index 10cff8b..31dd18d 100755 --- a/ebnf.gemspec +++ b/ebnf.gemspec @@ -9,7 +9,7 @@ Gem::Specification.new do |gem| gem.homepage = "https://github.com/dryruby/ebnf" gem.license = 'Unlicense' gem.summary = "EBNF parser and parser generator." - gem.description = %q{EBNF is a Ruby parser for W3C EBNF and a parser generator for compliant LL(1) grammars.} + gem.description = %q{EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF.} gem.authors = ['Gregg Kellogg'] gem.email = 'public-rdf-ruby@w3.org' @@ -27,10 +27,11 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'sxp', '~> 1.1' gem.add_runtime_dependency 'scanf', '~> 1.0' gem.add_runtime_dependency 'rdf', '~> 3.1' # Required by sxp + gem.add_runtime_dependency 'htmlentities', '~> 4.3' gem.add_development_dependency 'rdf-spec', '~> 3.1' gem.add_development_dependency 'rdf-turtle', '~> 3.1' - gem.add_development_dependency 'erubis', '~> 2.7' gem.add_development_dependency 'nokogiri', '~> 1.10' + gem.add_development_dependency 'erubis', '~> 2.7' gem.add_development_dependency 'rspec', '~> 3.9' gem.add_development_dependency 'rspec-its', '~> 1.3' gem.add_development_dependency 'yard', '~> 0.9' diff --git a/etc/doap.ttl b/etc/doap.ttl index 987b4f9..5a4f5c6 100644 --- a/etc/doap.ttl +++ b/etc/doap.ttl @@ -13,10 +13,17 @@ doap:homepage ; doap:license ; doap:shortdesc "EBNF parser and parser generator"@en ; - doap:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for compliant LL(1) grammars."@en ; + doap:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ; doap:created "2011-08-29"^^xsd:date ; doap:programming-language "Ruby" ; - doap:implements ; + doap:implements , + , + , + , + , + , + , + ; doap:category , ; doap:download-page <> ; @@ -28,6 +35,6 @@ doap:documenter ; foaf:maker ; dc:title "ebnf" ; - dc:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for compliant LL(1) grammars."@en ; + dc:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ; dc:date "2011-08-29"^^xsd:date ; dc:creator . diff --git a/lib/ebnf/ll1/scanner.rb b/lib/ebnf/ll1/scanner.rb index 469aab5..a115e3e 100644 --- a/lib/ebnf/ll1/scanner.rb +++ b/lib/ebnf/ll1/scanner.rb @@ -69,7 +69,6 @@ def eos? # @return [String] def rest feed_me - @lineno += 1 if eos? encode_utf8 super end diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index 48b7ea7..95f635f 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -75,8 +75,6 @@ def terminal_regexps; (@terminal_regexps ||= {}); end # @option options [Hash{String => String}] :map ({}) # A mapping from terminals, in lower-case form, to # their canonical value - # @option options [Boolean] :unescape - # Cause strings and codepoints to be unescaped. # @yield [value, prod] # @yieldparam [String] value # The scanned terminal value. @@ -269,7 +267,8 @@ def clear_packrat; @packrat.clear; end # @param [String] message Error string # @param [Hash{Symbol => Object}] options # @option options [URI, #to_s] :production - # @option options [Token] :token + # @option options [Boolean] :raise abort furhter processing + # @option options [Array] :backtrace state where error occured # @see #debug def error(node, message, **options) lineno = options[:lineno] || (scanner.lineno if scanner) @@ -282,7 +281,11 @@ def error(node, message, **options) @recovering = true debug(node, m, level: 3, **options) if options[:raise] || @options[:validate] - raise Error.new(m, lineno: lineno, rest: options[:rest], production: options[:production]) + raise Error.new(m, + lineno: lineno, + rest: options[:rest], + production: options[:production], + backtrace: options[:backtrace]) end end @@ -365,25 +368,27 @@ def onStart(prod) @productions << prod debug("#{prod}(:start)", "", lineno: (scanner.lineno if scanner), - pos: (scanner.pos if scanner), - depth: (depth + 1)) {"#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"} + pos: (scanner.pos if scanner) + ) do + "#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}" + end if handler # Create a new production data element, potentially allowing handler # to customize before pushing on the @prod_data stack - data = {} + data = {_production: prod} begin self.class.eval_with_binding(self) { handler.call(data, @parse_callback) } rescue ArgumentError, Error => e - error("start", "#{e.class}: #{e.message}", production: prod) + error("start", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace) @recovering = false end @prod_data << data elsif self.class.production_handlers[prod] # Make sure we push as many was we pop, even if there is no # explicit start handler - @prod_data << {} + @prod_data << {_production: prod} end return self.class.start_options.fetch(prod, {}) # any options on this production end @@ -397,6 +402,9 @@ def onFinish(result) prod = @productions.last handler, clear_packrat = self.class.production_handlers[prod] data = @prod_data.pop if handler || self.class.start_handlers[prod] + error("finish", + "prod_data production mismatch: expected #{prod.inspect}, got #{data[:_production].inspect}", + production: prod, prod_data: @prod_data) if data && prod != data[:_production] if handler && !@recovering && result != :unmatched # Pop production data element from stack, potentially allowing handler to use it result = begin @@ -404,14 +412,13 @@ def onFinish(result) handler.call(result, data, @parse_callback) } rescue ArgumentError, Error => e - error("finish", "#{e.class}: #{e.message}", production: prod) + error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace) @recovering = false end end - progress("#{prod}(:finish)", "", - depth: (depth + 1), - lineno: (scanner.lineno if scanner), - level: result == :unmatched ? 0 : 1) do + debug("#{prod}(:finish)", "", + lineno: (scanner.lineno if scanner), + level: result == :unmatched ? 0 : 1) do "#{result.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}" end self.clear_packrat if clear_packrat @@ -433,12 +440,12 @@ def onTerminal(prod, value) handler.call(value, parentProd, @parse_callback) } rescue ArgumentError, Error => e - error("terminal", "#{e.class}: #{e.message}", value: value, production: prod) + error("terminal", "#{e.class}: #{e.message}", value: value, production: prod, backtrace: e.backtrace) @recovering = false end end progress("#{prod}(:terminal)", "", - depth: (depth + 2), + depth: (depth + 1), lineno: (scanner.lineno if scanner), level: value == :unmatched ? 0 : 1) do "#{value.inspect}@(#{scanner ? scanner.pos : '?'})" diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index e64cf62..1033a52 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -8,6 +8,7 @@ module EBNF class Writer LINE_LENGTH = 80 + LINE_LENGTH_HTML = 200 # ASCII escape names ASCII_ESCAPE_NAMES = [ @@ -118,19 +119,21 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) lhs_fmt = "%-#{max_id+2}s " + lhs_fmt lhs_length += max_id + 3 end - rhs_length = LINE_LENGTH - lhs_length + rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length if html # Output as formatted HTML begin require 'erubis' + require 'htmlentities' + @coder = HTMLEntities.new eruby = Erubis::Eruby.new(ERB_DESC) formatted_rules = rules.map do |rule| if rule.kind == :terminals || rule.kind == :pass OpenStruct.new(id: ("@#{rule.kind}"), sym: nil, assign: nil, - formatted: ("Productions for terminals" if rule.kind == :terminals)) + formatted: ("# Productions for terminals" if rule.kind == :terminals)) else formatted_expr = self.send(format_meth, rule.expr) # Measure text without markup @@ -151,7 +154,7 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) formatted.sub!(%r{\s*\|\s*}, '') (ndx > 0 ? (rule.alt? ? '|' : '') : '=') end - lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id), + lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id), sym: (rule.sym if ndx == 0 || format == :abnf), assign: assign, formatted: formatted) @@ -171,7 +174,7 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) out.write eruby.evaluate(format: format, rules: formatted_rules) return rescue LoadError - $stderr.puts "Generating HTML requires erubis gem to be loaded" + $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded" end end @@ -216,7 +219,7 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) # Format the expression part of a rule def format_ebnf(expr, sep: nil, embedded: false) - return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) + return (@options[:html] ? %(#{@coder.encode expr}) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) return expr.length == 1 ? format_ebnf_char(expr) : @@ -290,10 +293,10 @@ def format_ebnf(expr, sep: nil, embedded: false) # Format a single-character string, prefering hex for non-main ASCII def format_ebnf_char(c) case c.ord - when (0x21) then (@options[:html] ? %("#{c}") : %{"#{c}"}) - when 0x22 then (@options[:html] ? %('"') : %{'"'}) - when (0x23..0x7e) then (@options[:html] ? %("#{c}") : %{"#{c}"}) - when (0x80..0xFFFD) then (@options[:html] ? %("#{c}") : %{"#{c}"}) + when (0x21) then (@options[:html] ? %("#{@coder.encode c}") : %{"#{c}"}) + when 0x22 then (@options[:html] ? %('"') : %{'"'}) + when (0x23..0x7e) then (@options[:html] ? %("#{@coder.encode c}") : %{"#{c}"}) + when (0x80..0xFFFD) then (@options[:html] ? %("#{@coder.encode c}") : %{"#{c}"}) else escape_ebnf_hex(c) end end @@ -308,7 +311,7 @@ def format_ebnf_range(string) while !s.eos? case when s.scan(/\A[!"\u0024-\u007e]+/) - buffer << (@options[:html] ? %(#{s.matched}) : s.matched) + buffer << (@options[:html] ? %(#{@coder.encode s.matched}) : s.matched) when s.scan(/\A#x\h+/) buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8)) else @@ -328,7 +331,8 @@ def format_ebnf_string(string, quote = '"') end end - "#{quote}#{string}#{quote}" + res = "#{quote}#{string}#{quote}" + @options[:html] ? @coder.encode(res) : res end def escape_ebnf_hex(u) @@ -341,11 +345,11 @@ def escape_ebnf_hex(u) char = fmt % u.ord if @options[:html] if u.ord <= 0x20 - char = %(#{char}) + char = %(#{@coder.encode char}) elsif u.ord < 0x7F - char = %(#{char}) + char = %(#{@coder.encode char}) elsif u.ord == 0x7F - char = %(#{char}) + char = %(#{@coder.encode char}) elsif u.ord <= 0xFF char = %(#{char}) else @@ -363,7 +367,7 @@ def escape_ebnf_hex(u) # Format the expression part of a rule def format_abnf(expr, sep: nil, embedded: false, sensitive: true) - return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) + return (@options[:html] ? %(#{@coder.encode expr}) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) if expr.length == 1 return format_abnf_char(expr) @@ -380,7 +384,7 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) seq.unshift(:seq) return format_abnf(seq, sep: nil, embedded: false) else - return (@options[:html] ? %("#{'%s' if sensitive}#{expr}") : %(#{'%s' if sensitive}"#{expr}")) + return (@options[:html] ? %("#{'%s' if sensitive}#{@coder.encode expr}") : %(#{'%s' if sensitive}"#{expr}")) end end parts = { @@ -528,11 +532,11 @@ def escape_abnf_hex(u) char = "%x" + (fmt % u.ord) if @options[:html] if u.ord <= 0x20 - char = %(#{char}) + char = %(#{@coder.encode char}) elsif u.ord <= 0x7F - char = %(#{char}) + char = %(#{@coder.encode char}) elsif u.ord == 0x7F - char = %(#{char}) + char = %(#{@coder.encode char}) elsif u.ord <= 0xFF char = %(#{char}) else @@ -550,7 +554,7 @@ def escape_abnf_hex(u) # Format the expression part of a rule def format_isoebnf(expr, sep: nil, embedded: false) - return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) + return (@options[:html] ? %(#{@coder.encode expr}) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/ expr.chars.each do |c| @@ -558,9 +562,9 @@ def format_isoebnf(expr, sep: nil, embedded: false) ISOEBNF::TERMINAL_CHARACTER.match?(c) end if expr =~ /"/ - return (@options[:html] ? %('#{expr}') : %('#{expr}')) + return (@options[:html] ? %('#{@coder.encode expr}') : %('#{expr}')) else - return (@options[:html] ? %("#{expr}") : %("#{expr}")) + return (@options[:html] ? %("#{@coder.encode expr}") : %("#{expr}")) end end parts = { diff --git a/spec/ll1/scanner_spec.rb b/spec/ll1/scanner_spec.rb index 74a4e37..360646a 100644 --- a/spec/ll1/scanner_spec.rb +++ b/spec/ll1/scanner_spec.rb @@ -21,7 +21,7 @@ f = double("input") expect(f).to receive(:read).and_return("ascii".force_encoding(Encoding::ASCII_8BIT)) expect(f).to receive(:gets).and_return("utf8".force_encoding(Encoding::UTF_8)) - expect(f).to receive(:eof?).and_return(false, false, true, true) + expect(f).to receive(:eof?).and_return(false, false, true) scanner = EBNF::LL1::Scanner.new(f) s = scanner.rest expect(s).to eq "asciiutf8"