From 12fa08836aac64eafa2e8f7d2473d44bebb09b3c Mon Sep 17 00:00:00 2001 From: Joel Low Date: Mon, 30 May 2016 12:32:15 +0800 Subject: [PATCH] Add support for the preprocess_html flag. This adds support for replacing consecutive spaces/nbsp to be converted to plain spaces for use with the Rouge lexer. --- lib/html/pipeline/rouge_filter.rb | 20 ++++++++++++++++---- test/rouge_filter_test.rb | 10 ++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/html/pipeline/rouge_filter.rb b/lib/html/pipeline/rouge_filter.rb index 17ba981..9a67e03 100644 --- a/lib/html/pipeline/rouge_filter.rb +++ b/lib/html/pipeline/rouge_filter.rb @@ -13,8 +13,7 @@ def call default = must_str(context[:highlight]) next unless lang = node["lang"] || default next unless lexer = lexer_for(lang) - node.css("br").each { |br| br.replace("\n") } if replace_br - text = node.inner_text + text = preprocess_html(node) html = highlight_with(lexer, text) next if html.nil? @@ -28,6 +27,15 @@ def call doc end + def preprocess_html(node) + node.css("br").each { |br| br.replace("\n") } if replace_br? + result = node.inner_text + return result unless preprocess_html? + + result.tr!("\u00A0", ' ') + result + end + def highlight_with(lexer, text) formatter.format(lexer.lex(text)) end @@ -40,8 +48,12 @@ def line_numbers context[:line_numbers] || false end - def replace_br - context[:replace_br] || false + def replace_br? + context[:replace_br] || preprocess_html? + end + + def preprocess_html? + context[:preprocess_html] || false end def formatter(css_class: default_css_class) diff --git a/test/rouge_filter_test.rb b/test/rouge_filter_test.rb index 73ce422..b52008d 100644 --- a/test/rouge_filter_test.rb +++ b/test/rouge_filter_test.rb @@ -95,6 +95,16 @@ def test_replacing_br doc = filter.call assert_equal "
"\
+                 "hello\n"\
+                 "world
\n", doc.to_html + end + + def test_preprocess_html + filter = RougeFilter.new \ + "
  hello
world
", preprocess_html: true + + doc = filter.call + assert_equal "
  "\
                  "hello\nworld
\n", doc.to_html end end