-
-
Notifications
You must be signed in to change notification settings - Fork 226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Made sanitization options of the HtmlRenderer pluggable #85
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,11 +89,22 @@ The following options are currently supported: | |
- `smart`: if `true`, straight quotes will be made curly, `--` will | ||
be changed to an en dash, `---` will be changed to an em dash, and | ||
`...` will be changed to ellipses. | ||
- `safe`: if `true`, raw HTML will not be passed through to HTML | ||
output (it will be replaced by comments), and potentially unsafe | ||
URLs in links and images (those beginning with `javascript:`, | ||
`vbscript:`, `file:`, and with a few exceptions `data:`) will | ||
be replaced with empty strings. | ||
- `safe`: if `true`, raw HTML will be passed through the `sanitize` | ||
function before inserting in the target document. Image urls and | ||
links will be passed true the `isUrlSafe` function to determin if they are considered safe. | ||
- `sanitize`: When the `safe`option is `true` this function will be used to | ||
sanitize the raw html fragments. The function will get the html | ||
(HtmlBlock or HtmlInline) AST node as parameter and should return | ||
the html string replacement as output. The AST node will have a | ||
`literal` property containing the raw html to be sanitized. | ||
Default implementation is: `function(node) { return '<!-- raw HTML omitted -->'; }` | ||
- `isUrlSafe`: When the `safe` option is `true` this function will be used to | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
verify if image and link url's are considered safe. The function gets the | ||
string containing the image source or link url as parameter and should return | ||
a truthy value if the url is safe. If unsafe, the src and href attributes | ||
will be omitted from the output html. | ||
Default: Strings beginning with `javascript:`, `vbscript:`, `file:`, and | ||
with a few exceptions `data:` are considered to be 'unsafe' and will be omitted. | ||
|
||
It is also possible to override the `escape` and `softbreak` | ||
properties of a renderer. So, to make soft breaks render as hard | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,12 +92,37 @@ var escapeXml = function(s, preserve_entities) { | |
} | ||
}; | ||
|
||
if (typeof Object.assign != 'function') { | ||
(function () { | ||
Object.assign = function (target) { | ||
'use strict'; | ||
if (target === undefined || target === null) { | ||
throw new TypeError('Cannot convert undefined or null to object'); | ||
} | ||
|
||
var output = Object(target); | ||
for (var index = 1; index < arguments.length; index++) { | ||
var source = arguments[index]; | ||
if (source !== undefined && source !== null) { | ||
for (var nextKey in source) { | ||
if (source.hasOwnProperty(nextKey)) { | ||
output[nextKey] = source[nextKey]; | ||
} | ||
} | ||
} | ||
} | ||
return output; | ||
}; | ||
})(); | ||
} | ||
|
||
module.exports = { unescapeString: unescapeString, | ||
normalizeURI: normalizeURI, | ||
escapeXml: escapeXml, | ||
reHtmlTag: reHtmlTag, | ||
OPENTAG: OPENTAG, | ||
CLOSETAG: CLOSETAG, | ||
ENTITY: ENTITY, | ||
ESCAPABLE: ESCAPABLE | ||
ESCAPABLE: ESCAPABLE, | ||
objectAssign: Object.assign | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain the |
||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
"use strict"; | ||
|
||
var escapeXml = require('./common').escapeXml; | ||
var common = require('./common'); | ||
var escapeXml = common.escapeXml; | ||
var objectAssign = common.objectAssign; | ||
|
||
|
||
// Helper function to produce an HTML tag. | ||
var tag = function(name, attrs, selfclosing) { | ||
|
@@ -25,9 +28,8 @@ var reHtmlTag = /\<[^>]*\>/; | |
var reUnsafeProtocol = /^javascript:|vbscript:|file:|data:/i; | ||
var reSafeDataProtocol = /^data:image\/(?:png|gif|jpeg|webp)/i; | ||
|
||
var potentiallyUnsafe = function(url) { | ||
return reUnsafeProtocol.test(url) && | ||
!reSafeDataProtocol.test(url); | ||
var isUrlSafe = function(url) { | ||
return !reUnsafeProtocol.test(url) || reSafeDataProtocol.test(url); | ||
}; | ||
|
||
var renderNodes = function(block) { | ||
|
@@ -61,6 +63,14 @@ var renderNodes = function(block) { | |
|
||
if (options.time) { console.time("rendering"); } | ||
|
||
function handleHtml(htmlNode){ | ||
if (options.safe) { | ||
out(options.sanitize(htmlNode)); | ||
} else { | ||
out(htmlNode.literal); | ||
} | ||
} | ||
|
||
while ((event = walker.next())) { | ||
entering = event.entering; | ||
node = event.node; | ||
|
@@ -98,11 +108,7 @@ var renderNodes = function(block) { | |
break; | ||
|
||
case 'HtmlInline': | ||
if (options.safe) { | ||
out('<!-- raw HTML omitted -->'); | ||
} else { | ||
out(node.literal); | ||
} | ||
handleHtml(node); | ||
break; | ||
|
||
case 'CustomInline': | ||
|
@@ -115,7 +121,7 @@ var renderNodes = function(block) { | |
|
||
case 'Link': | ||
if (entering) { | ||
if (!(options.safe && potentiallyUnsafe(node.destination))) { | ||
if (!options.safe || options.isUrlSafe(node.destination)) { | ||
attrs.push(['href', esc(node.destination, true)]); | ||
} | ||
if (node.title) { | ||
|
@@ -130,8 +136,7 @@ var renderNodes = function(block) { | |
case 'Image': | ||
if (entering) { | ||
if (disableTags === 0) { | ||
if (options.safe && | ||
potentiallyUnsafe(node.destination)) { | ||
if (options.safe && !options.isUrlSafe(node.destination)) { | ||
out('<img src="" alt="'); | ||
} else { | ||
out('<img src="' + esc(node.destination, true) + | ||
|
@@ -237,11 +242,7 @@ var renderNodes = function(block) { | |
|
||
case 'HtmlBlock': | ||
cr(); | ||
if (options.safe) { | ||
out('<!-- raw HTML omitted -->'); | ||
} else { | ||
out(node.literal); | ||
} | ||
handleHtml(node); | ||
cr(); | ||
break; | ||
|
||
|
@@ -278,7 +279,7 @@ function HtmlRenderer(options){ | |
// set to "<br />" to make them hard breaks | ||
// set to " " if you want to ignore line wrapping in source | ||
escape: escapeXml, | ||
options: options || {}, | ||
options: objectAssign({ sanitize: function() { return '<!-- raw HTML omitted -->'; }, isUrlSafe: isUrlSafe }, options), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One could instead do
and then as before
which would avoid the need for all that |
||
render: renderNodes | ||
}; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
## Sanitization options | ||
|
||
A safe html block should be preserved | ||
|
||
```````````````````````````````` example | ||
Should be preserved: | ||
|
||
<table> | ||
|
||
</table> | ||
. | ||
<p>Should be preserved:</p> | ||
<table> | ||
</table> | ||
```````````````````````````````` | ||
|
||
An unsafe html block should be omitted | ||
```````````````````````````````` example | ||
should be omitted: | ||
|
||
<script> | ||
</script> | ||
. | ||
<p>should be omitted:</p> | ||
<!-- unsafe html omitted --> | ||
```````````````````````````````` | ||
|
||
A safe inline html should be preserved | ||
```````````````````````````````` example | ||
Should be preserved: <table></table> | ||
. | ||
<p>Should be preserved: <table></table></p> | ||
```````````````````````````````` | ||
|
||
An unsafe inline html should be omitted | ||
```````````````````````````````` example | ||
Should be omitted: <script></script> | ||
. | ||
<p>Should be omitted: <!-- unsafe html omitted --><!-- unsafe html omitted --></p> | ||
```````````````````````````````` | ||
|
||
An safe url should be preserved: | ||
```````````````````````````````` example | ||
Should be preserved: ![image](https://saved-by-the-bell) | ||
. | ||
<p>Should be preserved: <img src="https://saved-by-the-bell" alt="image" /></p> | ||
```````````````````````````````` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missing space before option