diff --git a/README.md b/README.md index 4acd7436..1c815eb9 100644 --- a/README.md +++ b/README.md @@ -89,11 +89,22 @@ The following options are currently supported: - `smart`: if `true`, straight quotes will be made curly, `--` will be changed to an en dash, `---` will be changed to an em dash, and `...` will be changed to ellipses. -- `safe`: if `true`, raw HTML will not be passed through to HTML - output (it will be replaced by comments), and potentially unsafe - URLs in links and images (those beginning with `javascript:`, - `vbscript:`, `file:`, and with a few exceptions `data:`) will - be replaced with empty strings. +- `safe`: if `true`, raw HTML will be passed through the `sanitize` + function before inserting in the target document. Image urls and + links will be passed true the `isUrlSafe` function to determin if they are considered safe. +- `sanitize`: When the `safe`option is `true` this function will be used to + sanitize the raw html fragments. The function will get the html + (HtmlBlock or HtmlInline) AST node as parameter and should return + the html string replacement as output. The AST node will have a + `literal` property containing the raw html to be sanitized. + Default implementation is: `function(node) { return ''; }` +- `isUrlSafe`: When the `safe` option is `true` this function will be used to + verify if image and link url's are considered safe. The function gets the + string containing the image source or link url as parameter and should return + a truthy value if the url is safe. If unsafe, the src and href attributes + will be omitted from the output html. + Default: Strings beginning with `javascript:`, `vbscript:`, `file:`, and + with a few exceptions `data:` are considered to be 'unsafe' and will be omitted. It is also possible to override the `escape` and `softbreak` properties of a renderer. So, to make soft breaks render as hard diff --git a/lib/common.js b/lib/common.js index 605a3bf7..ba128a2c 100644 --- a/lib/common.js +++ b/lib/common.js @@ -92,6 +92,30 @@ var escapeXml = function(s, preserve_entities) { } }; +if (typeof Object.assign != 'function') { + (function () { + Object.assign = function (target) { + 'use strict'; + if (target === undefined || target === null) { + throw new TypeError('Cannot convert undefined or null to object'); + } + + var output = Object(target); + for (var index = 1; index < arguments.length; index++) { + var source = arguments[index]; + if (source !== undefined && source !== null) { + for (var nextKey in source) { + if (source.hasOwnProperty(nextKey)) { + output[nextKey] = source[nextKey]; + } + } + } + } + return output; + }; + })(); +} + module.exports = { unescapeString: unescapeString, normalizeURI: normalizeURI, escapeXml: escapeXml, @@ -99,5 +123,6 @@ module.exports = { unescapeString: unescapeString, OPENTAG: OPENTAG, CLOSETAG: CLOSETAG, ENTITY: ENTITY, - ESCAPABLE: ESCAPABLE + ESCAPABLE: ESCAPABLE, + objectAssign: Object.assign }; diff --git a/lib/html.js b/lib/html.js index 7f274902..3df37ba9 100644 --- a/lib/html.js +++ b/lib/html.js @@ -1,6 +1,9 @@ "use strict"; -var escapeXml = require('./common').escapeXml; +var common = require('./common'); +var escapeXml = common.escapeXml; +var objectAssign = common.objectAssign; + // Helper function to produce an HTML tag. var tag = function(name, attrs, selfclosing) { @@ -25,9 +28,8 @@ var reHtmlTag = /\<[^>]*\>/; var reUnsafeProtocol = /^javascript:|vbscript:|file:|data:/i; var reSafeDataProtocol = /^data:image\/(?:png|gif|jpeg|webp)/i; -var potentiallyUnsafe = function(url) { - return reUnsafeProtocol.test(url) && - !reSafeDataProtocol.test(url); +var isUrlSafe = function(url) { + return !reUnsafeProtocol.test(url) || reSafeDataProtocol.test(url); }; var renderNodes = function(block) { @@ -61,6 +63,14 @@ var renderNodes = function(block) { if (options.time) { console.time("rendering"); } + function handleHtml(htmlNode){ + if (options.safe) { + out(options.sanitize(htmlNode)); + } else { + out(htmlNode.literal); + } + } + while ((event = walker.next())) { entering = event.entering; node = event.node; @@ -98,11 +108,7 @@ var renderNodes = function(block) { break; case 'HtmlInline': - if (options.safe) { - out(''); - } else { - out(node.literal); - } + handleHtml(node); break; case 'CustomInline': @@ -115,7 +121,7 @@ var renderNodes = function(block) { case 'Link': if (entering) { - if (!(options.safe && potentiallyUnsafe(node.destination))) { + if (!options.safe || options.isUrlSafe(node.destination)) { attrs.push(['href', esc(node.destination, true)]); } if (node.title) { @@ -130,8 +136,7 @@ var renderNodes = function(block) { case 'Image': if (entering) { if (disableTags === 0) { - if (options.safe && - potentiallyUnsafe(node.destination)) { + if (options.safe && !options.isUrlSafe(node.destination)) { out('');
                     } else {
                         out('<img src='); - } else { - out(node.literal); - } + handleHtml(node); cr(); break; @@ -278,7 +279,7 @@ function HtmlRenderer(options){ // set to "
" to make them hard breaks // set to " " if you want to ignore line wrapping in source escape: escapeXml, - options: options || {}, + options: objectAssign({ sanitize: function() { return ''; }, isUrlSafe: isUrlSafe }, options), render: renderNodes }; } diff --git a/test/sanitize.txt b/test/sanitize.txt new file mode 100644 index 00000000..ab21f627 --- /dev/null +++ b/test/sanitize.txt @@ -0,0 +1,47 @@ +## Sanitization options + +A safe html block should be preserved + +```````````````````````````````` example +Should be preserved: + + + +
+. +

Should be preserved:

+ +
+```````````````````````````````` + +An unsafe html block should be omitted +```````````````````````````````` example +should be omitted: + + +. +

should be omitted:

+ +```````````````````````````````` + +A safe inline html should be preserved +```````````````````````````````` example +Should be preserved:
+. +

Should be preserved:

+```````````````````````````````` + +An unsafe inline html should be omitted +```````````````````````````````` example +Should be omitted: +. +

Should be omitted:

+```````````````````````````````` + +An safe url should be preserved: +```````````````````````````````` example +Should be preserved: ![image](https://saved-by-the-bell) +. +

Should be preserved: image

+```````````````````````````````` diff --git a/test/test.js b/test/test.js index a672e3ce..3af4abaa 100755 --- a/test/test.js +++ b/test/test.js @@ -39,6 +39,12 @@ var cursor = { }; var writer = new commonmark.HtmlRenderer(); +var writerSanitized = new commonmark.HtmlRenderer({safe: true, isUrlSafe: function(url){ return url === 'https://saved-by-the-bell'; }, sanitize: function(htmlFragment) { + if(htmlFragment.literal === '' || htmlFragment.literal === '
'){ + return htmlFragment.literal; + }else{ + return ''; + } }}); var reader = new commonmark.Parser(); var readerSmart = new commonmark.Parser({smart: true}); @@ -154,6 +160,10 @@ specTests('test/smart_punct.txt', results, function(z) { return writer.render(readerSmart.parse(z)); }); +specTests('test/sanitize.txt', results, function(z){ + return writerSanitized.render(reader.parse(z)); + }); + // pathological cases cursor.write('Pathological cases:\n');